openc_bot 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.travis.yml +8 -0
- data/CHANGELOG.md +2 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +253 -0
- data/Rakefile +14 -0
- data/bin/openc_bot +13 -0
- data/create_bot.sh +30 -0
- data/create_company_bot.sh +16 -0
- data/create_simple_licence_bot.sh +31 -0
- data/db/.gitkeep +0 -0
- data/examples/basic/.gitignore +3 -0
- data/examples/basic/Gemfile +7 -0
- data/examples/basic/config.yml +21 -0
- data/examples/basic/lib/basic.rb +88 -0
- data/examples/basic_with_proxy/Gemfile +7 -0
- data/examples/basic_with_proxy/config.yml +21 -0
- data/examples/basic_with_proxy/lib/basic_with_proxy.rb +103 -0
- data/examples/bot_with_simple_iterator/Gemfile +6 -0
- data/examples/bot_with_simple_iterator/config.yml +21 -0
- data/examples/bot_with_simple_iterator/lib/bot_with_simple_iterator.rb +112 -0
- data/examples/company_fetchers/basic.rb +49 -0
- data/lib/monkey_patches/mechanize.rb +53 -0
- data/lib/openc_bot.rb +89 -0
- data/lib/openc_bot/bot_data_validator.rb +18 -0
- data/lib/openc_bot/company_fetcher_bot.rb +40 -0
- data/lib/openc_bot/exceptions.rb +17 -0
- data/lib/openc_bot/helpers/_csv.rb +10 -0
- data/lib/openc_bot/helpers/alpha_search.rb +73 -0
- data/lib/openc_bot/helpers/dates.rb +33 -0
- data/lib/openc_bot/helpers/html.rb +8 -0
- data/lib/openc_bot/helpers/incremental_search.rb +106 -0
- data/lib/openc_bot/helpers/register_methods.rb +205 -0
- data/lib/openc_bot/helpers/text.rb +18 -0
- data/lib/openc_bot/incrementers.rb +2 -0
- data/lib/openc_bot/incrementers/base.rb +214 -0
- data/lib/openc_bot/incrementers/common.rb +47 -0
- data/lib/openc_bot/tasks.rb +385 -0
- data/lib/openc_bot/templates/README.md +35 -0
- data/lib/openc_bot/templates/bin/export_data +28 -0
- data/lib/openc_bot/templates/bin/fetch_data +23 -0
- data/lib/openc_bot/templates/bin/verify_data +1 -0
- data/lib/openc_bot/templates/config.yml +21 -0
- data/lib/openc_bot/templates/lib/bot.rb +43 -0
- data/lib/openc_bot/templates/lib/company_fetcher_bot.rb +95 -0
- data/lib/openc_bot/templates/lib/simple_bot.rb +67 -0
- data/lib/openc_bot/templates/spec/bot_spec.rb +11 -0
- data/lib/openc_bot/templates/spec/simple_bot_spec.rb +11 -0
- data/lib/openc_bot/templates/spec/spec_helper.rb +13 -0
- data/lib/openc_bot/version.rb +3 -0
- data/lib/simple_openc_bot.rb +289 -0
- data/openc_bot.gemspec +35 -0
- data/schemas/company-schema.json +112 -0
- data/schemas/includes/address.json +23 -0
- data/schemas/includes/base-statement.json +27 -0
- data/schemas/includes/company.json +14 -0
- data/schemas/includes/filing.json +20 -0
- data/schemas/includes/license-data.json +27 -0
- data/schemas/includes/officer.json +14 -0
- data/schemas/includes/previous_name.json +11 -0
- data/schemas/includes/share-parcel-data.json +67 -0
- data/schemas/includes/share-parcel.json +60 -0
- data/schemas/includes/subsidiary-relationship-data.json +52 -0
- data/schemas/includes/total-shares.json +10 -0
- data/schemas/licence-schema.json +21 -0
- data/schemas/share-parcel-schema.json +21 -0
- data/schemas/subsidiary-relationship-schema.json +19 -0
- data/spec/dummy_classes/foo_bot.rb +4 -0
- data/spec/lib/bot_data_validator_spec.rb +69 -0
- data/spec/lib/company_fetcher_bot_spec.rb +93 -0
- data/spec/lib/exceptions_spec.rb +25 -0
- data/spec/lib/helpers/alpha_search_spec.rb +173 -0
- data/spec/lib/helpers/dates_spec.rb +65 -0
- data/spec/lib/helpers/incremental_search_spec.rb +471 -0
- data/spec/lib/helpers/register_methods_spec.rb +558 -0
- data/spec/lib/helpers/text_spec.rb +50 -0
- data/spec/lib/openc_bot/db/.gitkeep +0 -0
- data/spec/lib/openc_bot/incrementers/common_spec.rb +83 -0
- data/spec/lib/openc_bot_spec.rb +116 -0
- data/spec/schemas/company-schema_spec.rb +676 -0
- data/spec/simple_openc_bot_spec.rb +302 -0
- data/spec/spec_helper.rb +19 -0
- metadata +300 -0
@@ -0,0 +1,205 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'json-schema'
|
3
|
+
require 'active_support/core_ext'
|
4
|
+
|
5
|
+
module OpencBot
|
6
|
+
module Helpers
|
7
|
+
module RegisterMethods
|
8
|
+
|
9
|
+
def use_alpha_search
|
10
|
+
self.const_defined?('USE_ALPHA_SEARCH') && self.const_get('USE_ALPHA_SEARCH')
|
11
|
+
end
|
12
|
+
|
13
|
+
def datum_exists?(uid)
|
14
|
+
!!select("ocdata.#{primary_key_name} FROM ocdata WHERE #{primary_key_name} = ? LIMIT 1", uid).first
|
15
|
+
end
|
16
|
+
|
17
|
+
# fetches and saves data. By default assumes an incremental search, or an alpha search
|
18
|
+
# if USE_ALPHA_SEARCH is set. This method should be overridden if you are going to do a
|
19
|
+
# different type of data import, e.g from a CSV file.
|
20
|
+
def fetch_data
|
21
|
+
if use_alpha_search
|
22
|
+
fetch_data_via_alpha_search
|
23
|
+
else
|
24
|
+
fetch_data_via_incremental_search
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def export_data
|
29
|
+
sql_query = "ocdata.* from ocdata"
|
30
|
+
select(sql_query).each do |res|
|
31
|
+
yield post_process(res, true)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def fetch_registry_page(company_number)
|
36
|
+
_client.get_content(registry_url(company_number))
|
37
|
+
end
|
38
|
+
|
39
|
+
def prepare_and_save_data(all_data,options={})
|
40
|
+
data_to_be_saved = prepare_for_saving(all_data)
|
41
|
+
insert_or_update([primary_key_name], data_to_be_saved)
|
42
|
+
end
|
43
|
+
|
44
|
+
def primary_key_name
|
45
|
+
self.const_defined?('PRIMARY_KEY_NAME') ? self.const_get('PRIMARY_KEY_NAME') : :uid
|
46
|
+
end
|
47
|
+
|
48
|
+
# sensible default. Either uses computed version or registry_url in db
|
49
|
+
def registry_url(uid)
|
50
|
+
computed_registry_url(uid) || registry_url_from_db(uid)
|
51
|
+
end
|
52
|
+
|
53
|
+
# stub method. Override in including module if this can be computed from uid
|
54
|
+
def computed_registry_url(uid)
|
55
|
+
end
|
56
|
+
|
57
|
+
# stub method. Override in including module if this can be pulled from db (i.e. it is stored there)
|
58
|
+
def registry_url_from_db(uid)
|
59
|
+
end
|
60
|
+
|
61
|
+
def save_entity(entity_datum)
|
62
|
+
validation_errors = validate_datum(entity_datum.except(:data))
|
63
|
+
return unless validation_errors.blank?
|
64
|
+
prepare_and_save_data(entity_datum)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Behaves like +save_entity+ but raises RecordInvalid exception if
|
68
|
+
# record is not valid (validation errors are available in the
|
69
|
+
# excpetion's +validation_errors+ method)
|
70
|
+
def save_entity!(entity_datum)
|
71
|
+
validation_errors = validate_datum(entity_datum.except(:data))
|
72
|
+
raise OpencBot::RecordInvalid.new(validation_errors) unless validation_errors.blank?
|
73
|
+
prepare_and_save_data(entity_datum)
|
74
|
+
end
|
75
|
+
|
76
|
+
def schema_name
|
77
|
+
self.const_defined?('SCHEMA_NAME') ? self.const_get('SCHEMA_NAME') : nil
|
78
|
+
end
|
79
|
+
|
80
|
+
def stale_entry_uids(stale_count=nil)
|
81
|
+
stale_count ||= 1000
|
82
|
+
sql_query = "ocdata.* from ocdata WHERE retrieved_at IS NULL OR strftime('%s', retrieved_at) < strftime('%s', '#{Date.today - 30}') LIMIT #{stale_count.to_i}"
|
83
|
+
raw_data = select(sql_query).each do |res|
|
84
|
+
yield res[primary_key_name.to_s]
|
85
|
+
end
|
86
|
+
rescue SQLite3::SQLException => e
|
87
|
+
if e.message[/no such column: retrieved_at/]
|
88
|
+
sqlite_magic_connection.add_columns('ocdata', ['retrieved_at'])
|
89
|
+
retry
|
90
|
+
else
|
91
|
+
raise e
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def update_data(options={})
|
96
|
+
fetch_data
|
97
|
+
update_stale
|
98
|
+
save_run_report(:status => 'success')
|
99
|
+
end
|
100
|
+
|
101
|
+
# This method updates a datum given by a uid (e.g. a company_number), by fetching new data, processing it
|
102
|
+
# and then saving it. It assumes the methods for doing this (#fetch_datum and #process_datum) are implemented
|
103
|
+
# in the module that includes this method.
|
104
|
+
#
|
105
|
+
# If no second argument is passed to this method, or false is passed, the
|
106
|
+
# method will return the processed data hash
|
107
|
+
# If true is passed as the second argument, the method will output the
|
108
|
+
# updated result as json to STDOUT, which can then be consumed by, say,
|
109
|
+
# something which triggered this method, for example if it was called by
|
110
|
+
# a rake task, which in turn might have been called by the main
|
111
|
+
# OpenCorporates application
|
112
|
+
def update_datum(uid, output_as_json=false,replace_existing_data=false)
|
113
|
+
return unless raw_data = fetch_datum(uid)
|
114
|
+
default_options = {primary_key_name => uid, :retrieved_at => Time.now}
|
115
|
+
processed_data = default_options.merge(process_datum(raw_data))
|
116
|
+
# prepare the data for saving (converting Arrays, Hashes to json) and
|
117
|
+
# save the original data too, as we may not extracting everything from it yet
|
118
|
+
save_entity(processed_data.merge(:data => raw_data))
|
119
|
+
if output_as_json
|
120
|
+
puts processed_data.to_json
|
121
|
+
else
|
122
|
+
processed_data
|
123
|
+
end
|
124
|
+
rescue Exception => e
|
125
|
+
output_json_error_message(e) if output_as_json
|
126
|
+
end
|
127
|
+
|
128
|
+
def update_stale(stale_count=nil)
|
129
|
+
stale_entry_uids(stale_count) do |stale_entry_uid|
|
130
|
+
update_datum(stale_entry_uid)
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
def validate_datum(record)
|
136
|
+
schema = File.expand_path("../../../../schemas/#{schema_name}.json", __FILE__)
|
137
|
+
errors = JSON::Validator.fully_validate(
|
138
|
+
schema,
|
139
|
+
record.to_json,
|
140
|
+
{:errors_as_objects => true})
|
141
|
+
end
|
142
|
+
|
143
|
+
def post_process(row_hash, skip_nulls=false)
|
144
|
+
# many of the fields will be serialized json and so we convert to ruby objects
|
145
|
+
convert_json_to_ruby(row_hash.except(:data), skip_nulls)
|
146
|
+
end
|
147
|
+
|
148
|
+
private
|
149
|
+
# This is a utility method for outputting an error message as json to STDOUT
|
150
|
+
# (which can then be handled by the importer)
|
151
|
+
def output_json_error_message(err_obj)
|
152
|
+
err_msg = {'error' => {'klass' => err_obj.class.to_s, 'message' => err_obj.message, 'backtrace' => err_obj.backtrace}}
|
153
|
+
puts err_msg.to_json
|
154
|
+
end
|
155
|
+
|
156
|
+
def prepare_for_saving(raw_data_hash)
|
157
|
+
prepared_data = deep_clone_hash(raw_data_hash)
|
158
|
+
#This jsonifies each value that is an an array or hash so that it can be saved as a string in sqlite
|
159
|
+
prepared_data.each do |k,v|
|
160
|
+
case v
|
161
|
+
when Array, Hash
|
162
|
+
prepared_data[k] = v.to_json
|
163
|
+
when Date, Time, DateTime
|
164
|
+
prepared_data[k] = v.iso8601
|
165
|
+
end
|
166
|
+
end
|
167
|
+
prepared_data
|
168
|
+
end
|
169
|
+
|
170
|
+
def _client(options={})
|
171
|
+
return @client if @client
|
172
|
+
@client = HTTPClient.new(options.delete(:proxy))
|
173
|
+
@client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE if options.delete(:skip_ssl_verification)
|
174
|
+
@client.agent_name = options.delete(:user_agent)
|
175
|
+
@client.ssl_config.ssl_version = options.delete(:ssl_version) if options[:ssl_version]
|
176
|
+
if ssl_certificate = options.delete(:ssl_certificate)
|
177
|
+
@client.ssl_config.add_trust_ca(ssl_certificate) # Above cert
|
178
|
+
end
|
179
|
+
@client
|
180
|
+
end
|
181
|
+
|
182
|
+
def deep_clone_hash(given_hash)
|
183
|
+
Marshal.load( Marshal.dump(given_hash) )
|
184
|
+
end
|
185
|
+
|
186
|
+
def convert_json_to_ruby(data_hash, skip_nulls=false)
|
187
|
+
data_hash.each do |k,v|
|
188
|
+
parsed_data = JSON.parse(v) if v.is_a?(String) && v[/^[\{\[]+\"|^\[\]$|^{}$/] rescue v
|
189
|
+
case parsed_data
|
190
|
+
when Hash
|
191
|
+
parsed_data = parsed_data.with_indifferent_access
|
192
|
+
when Array
|
193
|
+
parsed_data.collect!{ |e| e.is_a?(Hash) ? e.with_indifferent_access : e }
|
194
|
+
end
|
195
|
+
if skip_nulls && v.nil?
|
196
|
+
data_hash.delete(k)
|
197
|
+
else
|
198
|
+
data_hash[k] = parsed_data if parsed_data
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
module OpencBot
|
3
|
+
module Helpers
|
4
|
+
module Text
|
5
|
+
extend self
|
6
|
+
|
7
|
+
def normalise_utf8_spaces(raw_text)
|
8
|
+
raw_text&&raw_text.gsub(/\xC2\xA0/, ' ')
|
9
|
+
end
|
10
|
+
|
11
|
+
def strip_all_spaces(text)
|
12
|
+
text&&normalise_utf8_spaces(text).strip.gsub(/\s+/,' ')
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,214 @@
|
|
1
|
+
require 'backports/2.0.0/enumerable/lazy'
|
2
|
+
require 'json'
|
3
|
+
module OpencBot
|
4
|
+
class BaseIncrementer
|
5
|
+
|
6
|
+
def initialize(name, opts={})
|
7
|
+
@name = name
|
8
|
+
@expected_count = opts[:expected_count]
|
9
|
+
@count = 0
|
10
|
+
@app_path = opts[:app_path]
|
11
|
+
@show_progress = opts[:show_progress] || (opts[:show_progress].nil? && true)
|
12
|
+
@reset_iterator = opts[:reset_iterator]
|
13
|
+
@max_iterations = opts[:max_iterations]
|
14
|
+
@opts = opts
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.new(*args)
|
18
|
+
path, = caller[0].partition(":")
|
19
|
+
path = File.expand_path(File.join(File.dirname(path), ".."))
|
20
|
+
args << {} if args.count == 1
|
21
|
+
args[1][:app_path] = path if !args[1][:app_path]
|
22
|
+
super(*args)
|
23
|
+
end
|
24
|
+
|
25
|
+
def log_progress(percent)
|
26
|
+
puts "Iterator #{@name} progress: " + (percent.to_s + "%") if @show_progress
|
27
|
+
end
|
28
|
+
|
29
|
+
def progress_percent
|
30
|
+
(@count.to_f / @expected_count * 100).round(2) if @expected_count
|
31
|
+
end
|
32
|
+
|
33
|
+
def each
|
34
|
+
Enumerator.new do |yielder|
|
35
|
+
increment_yielder do |result|
|
36
|
+
if result.is_a? Hash
|
37
|
+
formatted_result = result.to_json
|
38
|
+
else
|
39
|
+
formatted_result = result
|
40
|
+
end
|
41
|
+
write_current(formatted_result)
|
42
|
+
yielder.yield(result)
|
43
|
+
@count += 1
|
44
|
+
log_progress(progress_percent)
|
45
|
+
end
|
46
|
+
reset_current
|
47
|
+
end.lazy
|
48
|
+
end
|
49
|
+
|
50
|
+
def resumable
|
51
|
+
enum = each
|
52
|
+
enum = resuming_enum(enum) unless @reset_iterator
|
53
|
+
enum = enum.take(@max_iterations) if @max_iterations
|
54
|
+
enum
|
55
|
+
end
|
56
|
+
|
57
|
+
def resuming_enum(enum)
|
58
|
+
start_from = read_current
|
59
|
+
preset_show_progress = @show_progress
|
60
|
+
@show_progress = false
|
61
|
+
if start_from && start_from != ""
|
62
|
+
enum = enum.drop_while do |x|
|
63
|
+
found_start_point = (x.to_s == start_from)
|
64
|
+
@show_progress = preset_show_progress && found_start_point
|
65
|
+
!found_start_point
|
66
|
+
end
|
67
|
+
end
|
68
|
+
enum
|
69
|
+
end
|
70
|
+
|
71
|
+
def position_file_name
|
72
|
+
"#{@app_path}/db/#{db_name}-iterator-position.txt"
|
73
|
+
end
|
74
|
+
|
75
|
+
def db_name
|
76
|
+
@name
|
77
|
+
end
|
78
|
+
|
79
|
+
# this is done with a file, rather than SQL, for speed reasons
|
80
|
+
def reset_current
|
81
|
+
File.open(position_file_name, "w") do |f|
|
82
|
+
f.write("")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def write_current(val)
|
87
|
+
File.open(position_file_name, "w") do |f|
|
88
|
+
f.write(val.to_s)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def read_current
|
93
|
+
begin
|
94
|
+
File.open(position_file_name, "r") do |f|
|
95
|
+
f.read
|
96
|
+
end
|
97
|
+
rescue Errno::ENOENT
|
98
|
+
nil
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
class ManualIncrementer < OpencBot::BaseIncrementer
|
104
|
+
|
105
|
+
include ScraperWiki
|
106
|
+
|
107
|
+
ITEMS_TABLE = "items"
|
108
|
+
|
109
|
+
def single_transaction
|
110
|
+
sqlite_magic_connection.execute("BEGIN TRANSACTION")
|
111
|
+
yield(self)
|
112
|
+
sqlite_magic_connection.execute("COMMIT")
|
113
|
+
end
|
114
|
+
|
115
|
+
def initialize(name, opts={})
|
116
|
+
super(name, opts)
|
117
|
+
raise "Fields must be defined for this Record" if opts[:fields].nil?
|
118
|
+
query = "CREATE TABLE IF NOT EXISTS #{ITEMS_TABLE} (#{opts[:fields].join(',')}, _id INTEGER PRIMARY KEY)"
|
119
|
+
sqlite_magic_connection.execute query
|
120
|
+
query = "CREATE UNIQUE INDEX IF NOT EXISTS #{opts[:fields].join('_')} " +
|
121
|
+
"ON #{ITEMS_TABLE} (#{opts[:fields].join(',')})"
|
122
|
+
sqlite_magic_connection.execute query
|
123
|
+
end
|
124
|
+
|
125
|
+
# Override default in ScraperWiki gem
|
126
|
+
def sqlite_magic_connection
|
127
|
+
db = File.expand_path(File.join(@app_path, 'db', "#{db_name}.db"))
|
128
|
+
@sqlite_magic_connection ||= SqliteMagic::Connection.new(db)
|
129
|
+
end
|
130
|
+
|
131
|
+
def increment_yielder(start_row=nil)
|
132
|
+
start_id = start_row && start_row["_id"].to_i
|
133
|
+
@expected_count = count_all_items
|
134
|
+
@count = count_processed_items(start_id)
|
135
|
+
loop do
|
136
|
+
result = read_batch(start_id).each do |row|
|
137
|
+
yield row
|
138
|
+
start_id = row["_id"].to_i + 1
|
139
|
+
end
|
140
|
+
raise StopIteration if result.empty?
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def populated
|
145
|
+
begin
|
146
|
+
result = select("populated FROM misc").first['populated']
|
147
|
+
result && result == "true"
|
148
|
+
rescue SqliteMagic::NoSuchTable
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def populated=(val)
|
153
|
+
if val && val == "true" || val == true
|
154
|
+
save_sqlite([:populated], {:populated => "true"}, "misc")
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def enum(*args)
|
159
|
+
self.populated = true
|
160
|
+
each
|
161
|
+
end
|
162
|
+
|
163
|
+
def add_row(val)
|
164
|
+
sqlite_magic_connection.insert_or_update(
|
165
|
+
val.keys, val, ITEMS_TABLE, :update_unique_keys => true)
|
166
|
+
end
|
167
|
+
|
168
|
+
def count_processed_items(start_id)
|
169
|
+
if start_id
|
170
|
+
begin
|
171
|
+
result = select("count(*) as count FROM #{ITEMS_TABLE} WHERE _id < #{start_id}").first
|
172
|
+
result && result['count']
|
173
|
+
rescue SqliteMagic::NoSuchTable
|
174
|
+
0
|
175
|
+
end
|
176
|
+
else
|
177
|
+
0
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def count_all_items
|
182
|
+
begin
|
183
|
+
select("count(*) as count FROM #{ITEMS_TABLE}").first['count']
|
184
|
+
rescue SqliteMagic::NoSuchTable
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def read_batch(start_id=nil)
|
189
|
+
sql = "* FROM #{ITEMS_TABLE}"
|
190
|
+
if start_id
|
191
|
+
sql += " WHERE _id >= #{start_id}"
|
192
|
+
end
|
193
|
+
sql += " LIMIT 100"
|
194
|
+
select(sql)
|
195
|
+
end
|
196
|
+
|
197
|
+
# override superclass definition for more efficient version
|
198
|
+
def resuming_enum(enum)
|
199
|
+
current_row = read_current && read_current != "" && JSON.parse(read_current)
|
200
|
+
if current_row
|
201
|
+
enum = Enumerator.new do |yielder|
|
202
|
+
increment_yielder(current_row) do |result|
|
203
|
+
write_current(result.to_json)
|
204
|
+
yielder.yield(result)
|
205
|
+
@count += 1
|
206
|
+
log_progress(progress_percent)
|
207
|
+
end
|
208
|
+
reset_current
|
209
|
+
end.lazy
|
210
|
+
end
|
211
|
+
enum
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module OpencBot
|
2
|
+
class NumericIncrementer < OpencBot::BaseIncrementer
|
3
|
+
def initialize(name, opts={})
|
4
|
+
raise "You must specify an end_val for a NumericIncrementer" if ! opts[:end_val]
|
5
|
+
@start_val = opts[:start_val] || 0
|
6
|
+
@end_val = opts[:end_val]
|
7
|
+
super(name, opts)
|
8
|
+
end
|
9
|
+
|
10
|
+
def increment_yielder
|
11
|
+
@expected_count = @end_val
|
12
|
+
i = @start_val
|
13
|
+
loop do
|
14
|
+
if i > @end_val
|
15
|
+
raise StopIteration
|
16
|
+
end
|
17
|
+
yield i
|
18
|
+
i += 1
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class AsciiIncrementer < OpencBot::BaseIncrementer
|
24
|
+
def initialize(name, opts={})
|
25
|
+
@size = opts[:size] || 3
|
26
|
+
super(name, opts)
|
27
|
+
end
|
28
|
+
|
29
|
+
def increment_yielder
|
30
|
+
alnum = (0...36).map{|i|i.to_s 36} # 0...z
|
31
|
+
all_perms = alnum.repeated_permutation(@size)
|
32
|
+
case @size
|
33
|
+
when 1
|
34
|
+
@expected_count = 36
|
35
|
+
when 2
|
36
|
+
@expected_count = 1296
|
37
|
+
when 3
|
38
|
+
@expected_count = 46656
|
39
|
+
when 4
|
40
|
+
@expected_count = 1679616
|
41
|
+
end
|
42
|
+
all_perms.each do |perm|
|
43
|
+
yield perm.join
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|