pure-extractor 0.6.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 37ee6bb92ce3fc3dc87712ab0d570d502a3808da
4
- data.tar.gz: 3797b31f40e188eed3f64e21a9c3ced09a6e8282
3
+ metadata.gz: 0219796a63f3282dbcd658e866487dfafaf5e6e5
4
+ data.tar.gz: 690d3103961bf267a92faeb1d7587597997d32da
5
5
  SHA512:
6
- metadata.gz: 354bb45f1f841b65e8ef2bf42567a529bddd711e49da98c8cee24d32be9335d663838b11473e2f63abc87752e63145e694405f7fc1ac621edcb59297e8df0210
7
- data.tar.gz: 7574ccb6aade9d1a09da68c279dae15050faecb8cc5e33c626f09811f60eb6d2fc88f2a7cf3425c0c8085bb2680abedf5e307573a6a0738af535b01fb5a7f495
6
+ metadata.gz: cb3681cb35c6649b68865604a565a6bebaac1451fc6d42302078ba94c71c1654d50022d2d19b658bf1f70ed44d8fa3e53f8743df5b0c73c3932728f24beee314
7
+ data.tar.gz: a2604591ba5435cebe5900a05f63fee26dc57e1a6964f0958ee9a527133220fb61ef76582100a259d672b19ae35d93cadbf52f3565890a1c6c915103e80a8d59
data/.gitignore CHANGED
@@ -8,3 +8,4 @@
8
8
  /spec/reports/
9
9
  /tmp/
10
10
  *.gem
11
+ .rbenv-gemsets
@@ -5,10 +5,11 @@ module Pure
5
5
  module Commands
6
6
  class PureCommand < Clamp::Command
7
7
 
8
- option ["-o", "--output-file"], "file", "file to output to, when extracting all this is the folder to place output files", required: true
8
+ option ["-o", "--output-dir"], "output-dir", "Directory to store generated files in", required: true
9
9
  option ["-s", "--server"], "server", "Full url to Pure WS rest server", required: true
10
10
  option ["-u", "--username"], "username", "Username to connect to Pure WS"
11
11
  option ["-p", "--password"], "password", "Password to connect to Pure WS"
12
+ option ["-c", "--chunk-size"], "chunk-size", "Number of entities to extract per file, defaults to 200"
12
13
 
13
14
  end
14
15
  end
@@ -8,7 +8,7 @@ module Pure
8
8
 
9
9
  include Pure::Extractor::ConfigurePuree
10
10
 
11
- valid_extracts = [:organisation, :people, :projects, :publications, :datasets, :all]
11
+ valid_extracts = [:organisation, :people, :projects, :publications, :datasets]
12
12
 
13
13
  parameter "EXTRACT", "what to extract from pure, valid options are #{valid_extracts.map{|v| v.to_s}}" do |s|
14
14
 
@@ -27,26 +27,8 @@ module Pure
27
27
  def execute
28
28
 
29
29
  configure_puree server, username, password
30
-
31
- case extract
32
-
33
- when :all
34
-
35
- valid_extracts.each do |extract|
36
-
37
- next unless extract != :all
38
-
39
- filename = output_file + "/" + extract.to_s + ".json"
40
-
41
- Pure::Extractor.extract pure_collections[extract], filename
42
-
43
- end
44
-
45
- else
46
-
47
- Pure::Extractor.extract pure_collections[extract], output_file
48
30
 
49
- end
31
+ Pure::Extractor.extract pure_collections[extract], chunk_size, output_dir
50
32
 
51
33
  end
52
34
 
@@ -1,5 +1,5 @@
1
1
  module Pure
2
2
  module Extractor
3
- VERSION = "0.6.0"
3
+ VERSION = "1.0.0"
4
4
  end
5
5
  end
@@ -6,7 +6,7 @@ require 'ruby-progressbar'
6
6
  module Pure
7
7
  module Extractor
8
8
 
9
- def self.extract type, output_file
9
+ def self.extract type, chunk_size, output_directory
10
10
 
11
11
  collection = Puree::Collection.new resource: type
12
12
 
@@ -17,13 +17,23 @@ module Pure
17
17
  progress_bar = ProgressBar.create(format: "%a %e %b\u{15E7}%i %p%% %t", progress_mark: ' ', remainder_mark: "\u{FF65}", total: collection_count)
18
18
 
19
19
  offset = 0
20
- limit = 20
21
-
22
- results = []
20
+ file_id = 0
21
+
22
+ if chunk_size.nil? || chunk_size.empty?
23
+ chunk_size = 200
24
+ end
25
+
26
+ chunk_size = chunk_size.to_i
23
27
 
24
28
  while offset < collection_count do
29
+
30
+ file_id += 1
31
+
32
+ filename = type.to_s + "_#{file_id.to_s.rjust(6, '0')}"
33
+
34
+ output_file = output_directory + "/#{filename}.json"
25
35
 
26
- returned_collection = collection.find limit: limit, offset: offset
36
+ returned_collection = collection.find limit: chunk_size, offset: offset
27
37
 
28
38
  returned_collection.each do |item|
29
39
 
@@ -31,17 +41,15 @@ module Pure
31
41
 
32
42
  end
33
43
 
34
- results.concat(returned_collection)
44
+ formatted_results = format_results_for_type type, returned_collection
45
+
46
+ write_results_to_file formatted_results, output_file
35
47
 
36
- update_progress_bar progress_bar, limit, collection_count
48
+ update_progress_bar progress_bar, chunk_size, collection_count
37
49
 
38
- offset += limit
50
+ offset += chunk_size
39
51
 
40
52
  end
41
-
42
- formatted_results = format_results_for_type type, results
43
-
44
- write_results_to_file formatted_results, output_file, type.to_s
45
53
 
46
54
  end
47
55
 
@@ -121,9 +129,7 @@ module Pure
121
129
 
122
130
  end
123
131
 
124
- def self.write_results_to_file results, file, collection_name
125
-
126
- puts "Writing #{collection_name} to #{file}"
132
+ def self.write_results_to_file results, file
127
133
 
128
134
  File.open(file, "w") do |f|
129
135
  f.write(JSON.pretty_generate(results))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pure-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stephen Robinson
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2016-12-09 00:00:00.000000000 Z
12
+ date: 2017-01-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: clamp