heydan 0.1.3.4 → 0.1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3260ed35f30821ea42bb12967484f2993dcc74c7
4
- data.tar.gz: 37b1fd894a085733cd55b925b56e6c1ea908c618
3
+ metadata.gz: 5eaa35cb9a73592da90cf0b1258f71e6db629fdb
4
+ data.tar.gz: c2b2d3d5354d8fe3a84dc0aedef4f88fff210f38
5
5
  SHA512:
6
- metadata.gz: 8df0f65d8988320d296fe4022ef333cd9d546f61f27b94566a18d1d25356871fb3f9c4ae3422b5f67d4b9a4da85ef095dadf4c9d00ba6aefd4ced3b015a479d0
7
- data.tar.gz: 372b5c89a7f9a3476ebd7503430496d3bf6002abea2fa7127a464b28a1025964c790b9ff91c86d14de65d90bc33f1d86ce5a1184c428167b69810e15a39147a7
6
+ metadata.gz: 3ab0c4108d611ea899ae93c8e5ea90be371a5d0970cd9b5ed7aabb0867720f3c4778592434e2aad037b157a8f10b65abb2fcc9962f450485136c6e4208136309
7
+ data.tar.gz: 8ea5c19c13da2352f32b7cc17befb0f99176ecc830374dbfae970ed39ec40ca1fd3b7a774208a000d1b15c8247ecbf78605bdacf5da3b3ec510b6a716735c432
@@ -41,6 +41,7 @@ Gem::Specification.new do |spec|
41
41
  spec.add_dependency "spreadsheet", "~>1.0"
42
42
  spec.add_dependency "dbf", "~> 2.0"
43
43
  spec.add_dependency "rubyXL", "~> 3.3"
44
+ spec.add_dependency "parallel"
44
45
 
45
46
 
46
47
  #development/test depedencies
@@ -52,10 +52,13 @@ module HeyDan
52
52
  HeyDan.options = options
53
53
  HeyDan::OpenCivicIdentifiers.build
54
54
  end
55
- desc 'import', "Imports files into elasticsearch"
56
- def import()
55
+
56
+ desc 'import NUMBER', "Imports files into elasticsearch, set the number to lower than 100 if the files are large"
57
+ option :parallel, type: :boolean
58
+ def import(number=100)
57
59
  HeyDan::helper_text('import')
58
- HeyDan::Import.process
60
+ options[:parallel] ? HeyDan::Import.process_in_parallel(number) : HeyDan::Import.process(number)
61
+
59
62
  end
60
63
 
61
64
  desc "sources SUBCOMMAND ...ARGS", "manage sources"
@@ -1,5 +1,6 @@
1
1
  require 'ruby-progressbar'
2
2
  require 'elasticsearch'
3
+ require 'parallel'
3
4
 
4
5
  class HeyDan::Import
5
6
  attr_accessor :client
@@ -21,16 +22,16 @@ class HeyDan::Import
21
22
  client.indices.create index: index
22
23
  end
23
24
 
24
- def process
25
+ def process(number=100)
25
26
  create_index unless check_index
26
27
  total = Dir.glob("#{HeyDan.folders[:jurisdictions]}/*").size
27
28
  files= Dir.glob("#{HeyDan.folders[:jurisdictions]}/*")
28
29
  a=0
29
- b=1000
30
+ b=number
30
31
  progress = ProgressBar.create(:title => "Importing #{files.size} jurisdictions into Elastic Search", :starting_at => a, :total => files.size)
31
32
  while true do
32
33
  @bulk = []
33
- b=( files.size - b < 1000 ? -1 : a + 1000)
34
+ b=( files.size - b < number ? -1 : a + number)
34
35
  files[a..b].each do |file|
35
36
  jf = HeyDan::JurisdictionFile.new(name: file)
36
37
  @bulk << { index: { _index: 'jurisdictions', _type: jf.type, _id: jf.hash_id, data: jf.get_json } }
@@ -45,5 +46,17 @@ class HeyDan::Import
45
46
  end
46
47
  end
47
48
  end
49
+
50
+ def process_in_parallel(number=100)
51
+ create_index unless check_index
52
+ total = Dir.glob("#{HeyDan.folders[:jurisdictions]}/*").size
53
+ files= Dir.glob("#{HeyDan.folders[:jurisdictions]}/*")
54
+ results = Parallel.map(files.each_slice(number).to_a) do |chunk|
55
+ chunk.each do |file|
56
+ jf = HeyDan::JurisdictionFile.new(name: file)
57
+ @client.index index: 'jurisdictions', type: jf.type, id: jf.hash_id, body: jf.get_json
58
+ end
59
+ end
60
+ end
48
61
  end
49
62
  end
@@ -1,3 +1,3 @@
1
1
  module HeyDan
2
- VERSION = "0.1.3.4"
2
+ VERSION = "0.1.3.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: heydan
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3.4
4
+ version: 0.1.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Melton
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-16 00:00:00.000000000 Z
11
+ date: 2015-09-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -192,6 +192,20 @@ dependencies:
192
192
  - - "~>"
193
193
  - !ruby/object:Gem::Version
194
194
  version: '3.3'
195
+ - !ruby/object:Gem::Dependency
196
+ name: parallel
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :runtime
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
195
209
  - !ruby/object:Gem::Dependency
196
210
  name: bundler
197
211
  requirement: !ruby/object:Gem::Requirement