openc_bot 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.travis.yml +8 -0
- data/CHANGELOG.md +2 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +253 -0
- data/Rakefile +14 -0
- data/bin/openc_bot +13 -0
- data/create_bot.sh +30 -0
- data/create_company_bot.sh +16 -0
- data/create_simple_licence_bot.sh +31 -0
- data/db/.gitkeep +0 -0
- data/examples/basic/.gitignore +3 -0
- data/examples/basic/Gemfile +7 -0
- data/examples/basic/config.yml +21 -0
- data/examples/basic/lib/basic.rb +88 -0
- data/examples/basic_with_proxy/Gemfile +7 -0
- data/examples/basic_with_proxy/config.yml +21 -0
- data/examples/basic_with_proxy/lib/basic_with_proxy.rb +103 -0
- data/examples/bot_with_simple_iterator/Gemfile +6 -0
- data/examples/bot_with_simple_iterator/config.yml +21 -0
- data/examples/bot_with_simple_iterator/lib/bot_with_simple_iterator.rb +112 -0
- data/examples/company_fetchers/basic.rb +49 -0
- data/lib/monkey_patches/mechanize.rb +53 -0
- data/lib/openc_bot.rb +89 -0
- data/lib/openc_bot/bot_data_validator.rb +18 -0
- data/lib/openc_bot/company_fetcher_bot.rb +40 -0
- data/lib/openc_bot/exceptions.rb +17 -0
- data/lib/openc_bot/helpers/_csv.rb +10 -0
- data/lib/openc_bot/helpers/alpha_search.rb +73 -0
- data/lib/openc_bot/helpers/dates.rb +33 -0
- data/lib/openc_bot/helpers/html.rb +8 -0
- data/lib/openc_bot/helpers/incremental_search.rb +106 -0
- data/lib/openc_bot/helpers/register_methods.rb +205 -0
- data/lib/openc_bot/helpers/text.rb +18 -0
- data/lib/openc_bot/incrementers.rb +2 -0
- data/lib/openc_bot/incrementers/base.rb +214 -0
- data/lib/openc_bot/incrementers/common.rb +47 -0
- data/lib/openc_bot/tasks.rb +385 -0
- data/lib/openc_bot/templates/README.md +35 -0
- data/lib/openc_bot/templates/bin/export_data +28 -0
- data/lib/openc_bot/templates/bin/fetch_data +23 -0
- data/lib/openc_bot/templates/bin/verify_data +1 -0
- data/lib/openc_bot/templates/config.yml +21 -0
- data/lib/openc_bot/templates/lib/bot.rb +43 -0
- data/lib/openc_bot/templates/lib/company_fetcher_bot.rb +95 -0
- data/lib/openc_bot/templates/lib/simple_bot.rb +67 -0
- data/lib/openc_bot/templates/spec/bot_spec.rb +11 -0
- data/lib/openc_bot/templates/spec/simple_bot_spec.rb +11 -0
- data/lib/openc_bot/templates/spec/spec_helper.rb +13 -0
- data/lib/openc_bot/version.rb +3 -0
- data/lib/simple_openc_bot.rb +289 -0
- data/openc_bot.gemspec +35 -0
- data/schemas/company-schema.json +112 -0
- data/schemas/includes/address.json +23 -0
- data/schemas/includes/base-statement.json +27 -0
- data/schemas/includes/company.json +14 -0
- data/schemas/includes/filing.json +20 -0
- data/schemas/includes/license-data.json +27 -0
- data/schemas/includes/officer.json +14 -0
- data/schemas/includes/previous_name.json +11 -0
- data/schemas/includes/share-parcel-data.json +67 -0
- data/schemas/includes/share-parcel.json +60 -0
- data/schemas/includes/subsidiary-relationship-data.json +52 -0
- data/schemas/includes/total-shares.json +10 -0
- data/schemas/licence-schema.json +21 -0
- data/schemas/share-parcel-schema.json +21 -0
- data/schemas/subsidiary-relationship-schema.json +19 -0
- data/spec/dummy_classes/foo_bot.rb +4 -0
- data/spec/lib/bot_data_validator_spec.rb +69 -0
- data/spec/lib/company_fetcher_bot_spec.rb +93 -0
- data/spec/lib/exceptions_spec.rb +25 -0
- data/spec/lib/helpers/alpha_search_spec.rb +173 -0
- data/spec/lib/helpers/dates_spec.rb +65 -0
- data/spec/lib/helpers/incremental_search_spec.rb +471 -0
- data/spec/lib/helpers/register_methods_spec.rb +558 -0
- data/spec/lib/helpers/text_spec.rb +50 -0
- data/spec/lib/openc_bot/db/.gitkeep +0 -0
- data/spec/lib/openc_bot/incrementers/common_spec.rb +83 -0
- data/spec/lib/openc_bot_spec.rb +116 -0
- data/spec/schemas/company-schema_spec.rb +676 -0
- data/spec/simple_openc_bot_spec.rb +302 -0
- data/spec/spec_helper.rb +19 -0
- metadata +300 -0
@@ -0,0 +1,205 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'json-schema'
|
3
|
+
require 'active_support/core_ext'
|
4
|
+
|
5
|
+
module OpencBot
|
6
|
+
module Helpers
|
7
|
+
module RegisterMethods
|
8
|
+
|
9
|
+
def use_alpha_search
|
10
|
+
self.const_defined?('USE_ALPHA_SEARCH') && self.const_get('USE_ALPHA_SEARCH')
|
11
|
+
end
|
12
|
+
|
13
|
+
def datum_exists?(uid)
|
14
|
+
!!select("ocdata.#{primary_key_name} FROM ocdata WHERE #{primary_key_name} = ? LIMIT 1", uid).first
|
15
|
+
end
|
16
|
+
|
17
|
+
# fetches and saves data. By default assumes an incremental search, or an alpha search
|
18
|
+
# if USE_ALPHA_SEARCH is set. This method should be overridden if you are going to do a
|
19
|
+
# different type of data import, e.g from a CSV file.
|
20
|
+
def fetch_data
|
21
|
+
if use_alpha_search
|
22
|
+
fetch_data_via_alpha_search
|
23
|
+
else
|
24
|
+
fetch_data_via_incremental_search
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def export_data
|
29
|
+
sql_query = "ocdata.* from ocdata"
|
30
|
+
select(sql_query).each do |res|
|
31
|
+
yield post_process(res, true)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def fetch_registry_page(company_number)
|
36
|
+
_client.get_content(registry_url(company_number))
|
37
|
+
end
|
38
|
+
|
39
|
+
def prepare_and_save_data(all_data,options={})
|
40
|
+
data_to_be_saved = prepare_for_saving(all_data)
|
41
|
+
insert_or_update([primary_key_name], data_to_be_saved)
|
42
|
+
end
|
43
|
+
|
44
|
+
def primary_key_name
|
45
|
+
self.const_defined?('PRIMARY_KEY_NAME') ? self.const_get('PRIMARY_KEY_NAME') : :uid
|
46
|
+
end
|
47
|
+
|
48
|
+
# sensible default. Either uses computed version or registry_url in db
|
49
|
+
def registry_url(uid)
|
50
|
+
computed_registry_url(uid) || registry_url_from_db(uid)
|
51
|
+
end
|
52
|
+
|
53
|
+
# stub method. Override in including module if this can be computed from uid
|
54
|
+
def computed_registry_url(uid)
|
55
|
+
end
|
56
|
+
|
57
|
+
# stub method. Override in including module if this can be pulled from db (i.e. it is stored there)
|
58
|
+
def registry_url_from_db(uid)
|
59
|
+
end
|
60
|
+
|
61
|
+
def save_entity(entity_datum)
|
62
|
+
validation_errors = validate_datum(entity_datum.except(:data))
|
63
|
+
return unless validation_errors.blank?
|
64
|
+
prepare_and_save_data(entity_datum)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Behaves like +save_entity+ but raises RecordInvalid exception if
|
68
|
+
# record is not valid (validation errors are available in the
|
69
|
+
# excpetion's +validation_errors+ method)
|
70
|
+
def save_entity!(entity_datum)
|
71
|
+
validation_errors = validate_datum(entity_datum.except(:data))
|
72
|
+
raise OpencBot::RecordInvalid.new(validation_errors) unless validation_errors.blank?
|
73
|
+
prepare_and_save_data(entity_datum)
|
74
|
+
end
|
75
|
+
|
76
|
+
def schema_name
|
77
|
+
self.const_defined?('SCHEMA_NAME') ? self.const_get('SCHEMA_NAME') : nil
|
78
|
+
end
|
79
|
+
|
80
|
+
def stale_entry_uids(stale_count=nil)
|
81
|
+
stale_count ||= 1000
|
82
|
+
sql_query = "ocdata.* from ocdata WHERE retrieved_at IS NULL OR strftime('%s', retrieved_at) < strftime('%s', '#{Date.today - 30}') LIMIT #{stale_count.to_i}"
|
83
|
+
raw_data = select(sql_query).each do |res|
|
84
|
+
yield res[primary_key_name.to_s]
|
85
|
+
end
|
86
|
+
rescue SQLite3::SQLException => e
|
87
|
+
if e.message[/no such column: retrieved_at/]
|
88
|
+
sqlite_magic_connection.add_columns('ocdata', ['retrieved_at'])
|
89
|
+
retry
|
90
|
+
else
|
91
|
+
raise e
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def update_data(options={})
|
96
|
+
fetch_data
|
97
|
+
update_stale
|
98
|
+
save_run_report(:status => 'success')
|
99
|
+
end
|
100
|
+
|
101
|
+
# This method updates a datum given by a uid (e.g. a company_number), by fetching new data, processing it
|
102
|
+
# and then saving it. It assumes the methods for doing this (#fetch_datum and #process_datum) are implemented
|
103
|
+
# in the module that includes this method.
|
104
|
+
#
|
105
|
+
# If no second argument is passed to this method, or false is passed, the
|
106
|
+
# method will return the processed data hash
|
107
|
+
# If true is passed as the second argument, the method will output the
|
108
|
+
# updated result as json to STDOUT, which can then be consumed by, say,
|
109
|
+
# something which triggered this method, for example if it was called by
|
110
|
+
# a rake task, which in turn might have been called by the main
|
111
|
+
# OpenCorporates application
|
112
|
+
def update_datum(uid, output_as_json=false,replace_existing_data=false)
|
113
|
+
return unless raw_data = fetch_datum(uid)
|
114
|
+
default_options = {primary_key_name => uid, :retrieved_at => Time.now}
|
115
|
+
processed_data = default_options.merge(process_datum(raw_data))
|
116
|
+
# prepare the data for saving (converting Arrays, Hashes to json) and
|
117
|
+
# save the original data too, as we may not extracting everything from it yet
|
118
|
+
save_entity(processed_data.merge(:data => raw_data))
|
119
|
+
if output_as_json
|
120
|
+
puts processed_data.to_json
|
121
|
+
else
|
122
|
+
processed_data
|
123
|
+
end
|
124
|
+
rescue Exception => e
|
125
|
+
output_json_error_message(e) if output_as_json
|
126
|
+
end
|
127
|
+
|
128
|
+
def update_stale(stale_count=nil)
|
129
|
+
stale_entry_uids(stale_count) do |stale_entry_uid|
|
130
|
+
update_datum(stale_entry_uid)
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
def validate_datum(record)
|
136
|
+
schema = File.expand_path("../../../../schemas/#{schema_name}.json", __FILE__)
|
137
|
+
errors = JSON::Validator.fully_validate(
|
138
|
+
schema,
|
139
|
+
record.to_json,
|
140
|
+
{:errors_as_objects => true})
|
141
|
+
end
|
142
|
+
|
143
|
+
def post_process(row_hash, skip_nulls=false)
|
144
|
+
# many of the fields will be serialized json and so we convert to ruby objects
|
145
|
+
convert_json_to_ruby(row_hash.except(:data), skip_nulls)
|
146
|
+
end
|
147
|
+
|
148
|
+
private
|
149
|
+
# This is a utility method for outputting an error message as json to STDOUT
|
150
|
+
# (which can then be handled by the importer)
|
151
|
+
def output_json_error_message(err_obj)
|
152
|
+
err_msg = {'error' => {'klass' => err_obj.class.to_s, 'message' => err_obj.message, 'backtrace' => err_obj.backtrace}}
|
153
|
+
puts err_msg.to_json
|
154
|
+
end
|
155
|
+
|
156
|
+
def prepare_for_saving(raw_data_hash)
|
157
|
+
prepared_data = deep_clone_hash(raw_data_hash)
|
158
|
+
#This jsonifies each value that is an an array or hash so that it can be saved as a string in sqlite
|
159
|
+
prepared_data.each do |k,v|
|
160
|
+
case v
|
161
|
+
when Array, Hash
|
162
|
+
prepared_data[k] = v.to_json
|
163
|
+
when Date, Time, DateTime
|
164
|
+
prepared_data[k] = v.iso8601
|
165
|
+
end
|
166
|
+
end
|
167
|
+
prepared_data
|
168
|
+
end
|
169
|
+
|
170
|
+
def _client(options={})
|
171
|
+
return @client if @client
|
172
|
+
@client = HTTPClient.new(options.delete(:proxy))
|
173
|
+
@client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE if options.delete(:skip_ssl_verification)
|
174
|
+
@client.agent_name = options.delete(:user_agent)
|
175
|
+
@client.ssl_config.ssl_version = options.delete(:ssl_version) if options[:ssl_version]
|
176
|
+
if ssl_certificate = options.delete(:ssl_certificate)
|
177
|
+
@client.ssl_config.add_trust_ca(ssl_certificate) # Above cert
|
178
|
+
end
|
179
|
+
@client
|
180
|
+
end
|
181
|
+
|
182
|
+
def deep_clone_hash(given_hash)
|
183
|
+
Marshal.load( Marshal.dump(given_hash) )
|
184
|
+
end
|
185
|
+
|
186
|
+
def convert_json_to_ruby(data_hash, skip_nulls=false)
|
187
|
+
data_hash.each do |k,v|
|
188
|
+
parsed_data = JSON.parse(v) if v.is_a?(String) && v[/^[\{\[]+\"|^\[\]$|^{}$/] rescue v
|
189
|
+
case parsed_data
|
190
|
+
when Hash
|
191
|
+
parsed_data = parsed_data.with_indifferent_access
|
192
|
+
when Array
|
193
|
+
parsed_data.collect!{ |e| e.is_a?(Hash) ? e.with_indifferent_access : e }
|
194
|
+
end
|
195
|
+
if skip_nulls && v.nil?
|
196
|
+
data_hash.delete(k)
|
197
|
+
else
|
198
|
+
data_hash[k] = parsed_data if parsed_data
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
module OpencBot
|
3
|
+
module Helpers
|
4
|
+
module Text
|
5
|
+
extend self
|
6
|
+
|
7
|
+
def normalise_utf8_spaces(raw_text)
|
8
|
+
raw_text&&raw_text.gsub(/\xC2\xA0/, ' ')
|
9
|
+
end
|
10
|
+
|
11
|
+
def strip_all_spaces(text)
|
12
|
+
text&&normalise_utf8_spaces(text).strip.gsub(/\s+/,' ')
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,214 @@
|
|
1
|
+
require 'backports/2.0.0/enumerable/lazy'
|
2
|
+
require 'json'
|
3
|
+
module OpencBot
|
4
|
+
class BaseIncrementer
|
5
|
+
|
6
|
+
def initialize(name, opts={})
|
7
|
+
@name = name
|
8
|
+
@expected_count = opts[:expected_count]
|
9
|
+
@count = 0
|
10
|
+
@app_path = opts[:app_path]
|
11
|
+
@show_progress = opts[:show_progress] || (opts[:show_progress].nil? && true)
|
12
|
+
@reset_iterator = opts[:reset_iterator]
|
13
|
+
@max_iterations = opts[:max_iterations]
|
14
|
+
@opts = opts
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.new(*args)
|
18
|
+
path, = caller[0].partition(":")
|
19
|
+
path = File.expand_path(File.join(File.dirname(path), ".."))
|
20
|
+
args << {} if args.count == 1
|
21
|
+
args[1][:app_path] = path if !args[1][:app_path]
|
22
|
+
super(*args)
|
23
|
+
end
|
24
|
+
|
25
|
+
def log_progress(percent)
|
26
|
+
puts "Iterator #{@name} progress: " + (percent.to_s + "%") if @show_progress
|
27
|
+
end
|
28
|
+
|
29
|
+
def progress_percent
|
30
|
+
(@count.to_f / @expected_count * 100).round(2) if @expected_count
|
31
|
+
end
|
32
|
+
|
33
|
+
def each
|
34
|
+
Enumerator.new do |yielder|
|
35
|
+
increment_yielder do |result|
|
36
|
+
if result.is_a? Hash
|
37
|
+
formatted_result = result.to_json
|
38
|
+
else
|
39
|
+
formatted_result = result
|
40
|
+
end
|
41
|
+
write_current(formatted_result)
|
42
|
+
yielder.yield(result)
|
43
|
+
@count += 1
|
44
|
+
log_progress(progress_percent)
|
45
|
+
end
|
46
|
+
reset_current
|
47
|
+
end.lazy
|
48
|
+
end
|
49
|
+
|
50
|
+
def resumable
|
51
|
+
enum = each
|
52
|
+
enum = resuming_enum(enum) unless @reset_iterator
|
53
|
+
enum = enum.take(@max_iterations) if @max_iterations
|
54
|
+
enum
|
55
|
+
end
|
56
|
+
|
57
|
+
def resuming_enum(enum)
|
58
|
+
start_from = read_current
|
59
|
+
preset_show_progress = @show_progress
|
60
|
+
@show_progress = false
|
61
|
+
if start_from && start_from != ""
|
62
|
+
enum = enum.drop_while do |x|
|
63
|
+
found_start_point = (x.to_s == start_from)
|
64
|
+
@show_progress = preset_show_progress && found_start_point
|
65
|
+
!found_start_point
|
66
|
+
end
|
67
|
+
end
|
68
|
+
enum
|
69
|
+
end
|
70
|
+
|
71
|
+
def position_file_name
|
72
|
+
"#{@app_path}/db/#{db_name}-iterator-position.txt"
|
73
|
+
end
|
74
|
+
|
75
|
+
def db_name
|
76
|
+
@name
|
77
|
+
end
|
78
|
+
|
79
|
+
# this is done with a file, rather than SQL, for speed reasons
|
80
|
+
def reset_current
|
81
|
+
File.open(position_file_name, "w") do |f|
|
82
|
+
f.write("")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def write_current(val)
|
87
|
+
File.open(position_file_name, "w") do |f|
|
88
|
+
f.write(val.to_s)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def read_current
|
93
|
+
begin
|
94
|
+
File.open(position_file_name, "r") do |f|
|
95
|
+
f.read
|
96
|
+
end
|
97
|
+
rescue Errno::ENOENT
|
98
|
+
nil
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
class ManualIncrementer < OpencBot::BaseIncrementer
|
104
|
+
|
105
|
+
include ScraperWiki
|
106
|
+
|
107
|
+
ITEMS_TABLE = "items"
|
108
|
+
|
109
|
+
def single_transaction
|
110
|
+
sqlite_magic_connection.execute("BEGIN TRANSACTION")
|
111
|
+
yield(self)
|
112
|
+
sqlite_magic_connection.execute("COMMIT")
|
113
|
+
end
|
114
|
+
|
115
|
+
def initialize(name, opts={})
|
116
|
+
super(name, opts)
|
117
|
+
raise "Fields must be defined for this Record" if opts[:fields].nil?
|
118
|
+
query = "CREATE TABLE IF NOT EXISTS #{ITEMS_TABLE} (#{opts[:fields].join(',')}, _id INTEGER PRIMARY KEY)"
|
119
|
+
sqlite_magic_connection.execute query
|
120
|
+
query = "CREATE UNIQUE INDEX IF NOT EXISTS #{opts[:fields].join('_')} " +
|
121
|
+
"ON #{ITEMS_TABLE} (#{opts[:fields].join(',')})"
|
122
|
+
sqlite_magic_connection.execute query
|
123
|
+
end
|
124
|
+
|
125
|
+
# Override default in ScraperWiki gem
|
126
|
+
def sqlite_magic_connection
|
127
|
+
db = File.expand_path(File.join(@app_path, 'db', "#{db_name}.db"))
|
128
|
+
@sqlite_magic_connection ||= SqliteMagic::Connection.new(db)
|
129
|
+
end
|
130
|
+
|
131
|
+
def increment_yielder(start_row=nil)
|
132
|
+
start_id = start_row && start_row["_id"].to_i
|
133
|
+
@expected_count = count_all_items
|
134
|
+
@count = count_processed_items(start_id)
|
135
|
+
loop do
|
136
|
+
result = read_batch(start_id).each do |row|
|
137
|
+
yield row
|
138
|
+
start_id = row["_id"].to_i + 1
|
139
|
+
end
|
140
|
+
raise StopIteration if result.empty?
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def populated
|
145
|
+
begin
|
146
|
+
result = select("populated FROM misc").first['populated']
|
147
|
+
result && result == "true"
|
148
|
+
rescue SqliteMagic::NoSuchTable
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def populated=(val)
|
153
|
+
if val && val == "true" || val == true
|
154
|
+
save_sqlite([:populated], {:populated => "true"}, "misc")
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def enum(*args)
|
159
|
+
self.populated = true
|
160
|
+
each
|
161
|
+
end
|
162
|
+
|
163
|
+
def add_row(val)
|
164
|
+
sqlite_magic_connection.insert_or_update(
|
165
|
+
val.keys, val, ITEMS_TABLE, :update_unique_keys => true)
|
166
|
+
end
|
167
|
+
|
168
|
+
def count_processed_items(start_id)
|
169
|
+
if start_id
|
170
|
+
begin
|
171
|
+
result = select("count(*) as count FROM #{ITEMS_TABLE} WHERE _id < #{start_id}").first
|
172
|
+
result && result['count']
|
173
|
+
rescue SqliteMagic::NoSuchTable
|
174
|
+
0
|
175
|
+
end
|
176
|
+
else
|
177
|
+
0
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def count_all_items
|
182
|
+
begin
|
183
|
+
select("count(*) as count FROM #{ITEMS_TABLE}").first['count']
|
184
|
+
rescue SqliteMagic::NoSuchTable
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def read_batch(start_id=nil)
|
189
|
+
sql = "* FROM #{ITEMS_TABLE}"
|
190
|
+
if start_id
|
191
|
+
sql += " WHERE _id >= #{start_id}"
|
192
|
+
end
|
193
|
+
sql += " LIMIT 100"
|
194
|
+
select(sql)
|
195
|
+
end
|
196
|
+
|
197
|
+
# override superclass definition for more efficient version
|
198
|
+
def resuming_enum(enum)
|
199
|
+
current_row = read_current && read_current != "" && JSON.parse(read_current)
|
200
|
+
if current_row
|
201
|
+
enum = Enumerator.new do |yielder|
|
202
|
+
increment_yielder(current_row) do |result|
|
203
|
+
write_current(result.to_json)
|
204
|
+
yielder.yield(result)
|
205
|
+
@count += 1
|
206
|
+
log_progress(progress_percent)
|
207
|
+
end
|
208
|
+
reset_current
|
209
|
+
end.lazy
|
210
|
+
end
|
211
|
+
enum
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module OpencBot
|
2
|
+
class NumericIncrementer < OpencBot::BaseIncrementer
|
3
|
+
def initialize(name, opts={})
|
4
|
+
raise "You must specify an end_val for a NumericIncrementer" if ! opts[:end_val]
|
5
|
+
@start_val = opts[:start_val] || 0
|
6
|
+
@end_val = opts[:end_val]
|
7
|
+
super(name, opts)
|
8
|
+
end
|
9
|
+
|
10
|
+
def increment_yielder
|
11
|
+
@expected_count = @end_val
|
12
|
+
i = @start_val
|
13
|
+
loop do
|
14
|
+
if i > @end_val
|
15
|
+
raise StopIteration
|
16
|
+
end
|
17
|
+
yield i
|
18
|
+
i += 1
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class AsciiIncrementer < OpencBot::BaseIncrementer
|
24
|
+
def initialize(name, opts={})
|
25
|
+
@size = opts[:size] || 3
|
26
|
+
super(name, opts)
|
27
|
+
end
|
28
|
+
|
29
|
+
def increment_yielder
|
30
|
+
alnum = (0...36).map{|i|i.to_s 36} # 0...z
|
31
|
+
all_perms = alnum.repeated_permutation(@size)
|
32
|
+
case @size
|
33
|
+
when 1
|
34
|
+
@expected_count = 36
|
35
|
+
when 2
|
36
|
+
@expected_count = 1296
|
37
|
+
when 3
|
38
|
+
@expected_count = 46656
|
39
|
+
when 4
|
40
|
+
@expected_count = 1679616
|
41
|
+
end
|
42
|
+
all_perms.each do |perm|
|
43
|
+
yield perm.join
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|