pure-extractor 0.6.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 37ee6bb92ce3fc3dc87712ab0d570d502a3808da
4
- data.tar.gz: 3797b31f40e188eed3f64e21a9c3ced09a6e8282
3
+ metadata.gz: 0219796a63f3282dbcd658e866487dfafaf5e6e5
4
+ data.tar.gz: 690d3103961bf267a92faeb1d7587597997d32da
5
5
  SHA512:
6
- metadata.gz: 354bb45f1f841b65e8ef2bf42567a529bddd711e49da98c8cee24d32be9335d663838b11473e2f63abc87752e63145e694405f7fc1ac621edcb59297e8df0210
7
- data.tar.gz: 7574ccb6aade9d1a09da68c279dae15050faecb8cc5e33c626f09811f60eb6d2fc88f2a7cf3425c0c8085bb2680abedf5e307573a6a0738af535b01fb5a7f495
6
+ metadata.gz: cb3681cb35c6649b68865604a565a6bebaac1451fc6d42302078ba94c71c1654d50022d2d19b658bf1f70ed44d8fa3e53f8743df5b0c73c3932728f24beee314
7
+ data.tar.gz: a2604591ba5435cebe5900a05f63fee26dc57e1a6964f0958ee9a527133220fb61ef76582100a259d672b19ae35d93cadbf52f3565890a1c6c915103e80a8d59
data/.gitignore CHANGED
@@ -8,3 +8,4 @@
8
8
  /spec/reports/
9
9
  /tmp/
10
10
  *.gem
11
+ .rbenv-gemsets
@@ -5,10 +5,11 @@ module Pure
5
5
  module Commands
6
6
  class PureCommand < Clamp::Command
7
7
 
8
- option ["-o", "--output-file"], "file", "file to output to, when extracting all this is the folder to place output files", required: true
8
+ option ["-o", "--output-dir"], "output-dir", "Directory to store generated files in", required: true
9
9
  option ["-s", "--server"], "server", "Full url to Pure WS rest server", required: true
10
10
  option ["-u", "--username"], "username", "Username to connect to Pure WS"
11
11
  option ["-p", "--password"], "password", "Password to connect to Pure WS"
12
+ option ["-c", "--chunk-size"], "chunk-size", "Number of entities to extract per file, defaults to 200"
12
13
 
13
14
  end
14
15
  end
@@ -8,7 +8,7 @@ module Pure
8
8
 
9
9
  include Pure::Extractor::ConfigurePuree
10
10
 
11
- valid_extracts = [:organisation, :people, :projects, :publications, :datasets, :all]
11
+ valid_extracts = [:organisation, :people, :projects, :publications, :datasets]
12
12
 
13
13
  parameter "EXTRACT", "what to extract from pure, valid options are #{valid_extracts.map{|v| v.to_s}}" do |s|
14
14
 
@@ -27,26 +27,8 @@ module Pure
27
27
  def execute
28
28
 
29
29
  configure_puree server, username, password
30
-
31
- case extract
32
-
33
- when :all
34
-
35
- valid_extracts.each do |extract|
36
-
37
- next unless extract != :all
38
-
39
- filename = output_file + "/" + extract.to_s + ".json"
40
-
41
- Pure::Extractor.extract pure_collections[extract], filename
42
-
43
- end
44
-
45
- else
46
-
47
- Pure::Extractor.extract pure_collections[extract], output_file
48
30
 
49
- end
31
+ Pure::Extractor.extract pure_collections[extract], chunk_size, output_dir
50
32
 
51
33
  end
52
34
 
@@ -1,5 +1,5 @@
1
1
  module Pure
2
2
  module Extractor
3
- VERSION = "0.6.0"
3
+ VERSION = "1.0.0"
4
4
  end
5
5
  end
@@ -6,7 +6,7 @@ require 'ruby-progressbar'
6
6
  module Pure
7
7
  module Extractor
8
8
 
9
- def self.extract type, output_file
9
+ def self.extract type, chunk_size, output_directory
10
10
 
11
11
  collection = Puree::Collection.new resource: type
12
12
 
@@ -17,13 +17,23 @@ module Pure
17
17
  progress_bar = ProgressBar.create(format: "%a %e %b\u{15E7}%i %p%% %t", progress_mark: ' ', remainder_mark: "\u{FF65}", total: collection_count)
18
18
 
19
19
  offset = 0
20
- limit = 20
21
-
22
- results = []
20
+ file_id = 0
21
+
22
+ if chunk_size.nil? || chunk_size.empty?
23
+ chunk_size = 200
24
+ end
25
+
26
+ chunk_size = chunk_size.to_i
23
27
 
24
28
  while offset < collection_count do
29
+
30
+ file_id += 1
31
+
32
+ filename = type.to_s + "_#{file_id.to_s.rjust(6, '0')}"
33
+
34
+ output_file = output_directory + "/#{filename}.json"
25
35
 
26
- returned_collection = collection.find limit: limit, offset: offset
36
+ returned_collection = collection.find limit: chunk_size, offset: offset
27
37
 
28
38
  returned_collection.each do |item|
29
39
 
@@ -31,17 +41,15 @@ module Pure
31
41
 
32
42
  end
33
43
 
34
- results.concat(returned_collection)
44
+ formatted_results = format_results_for_type type, returned_collection
45
+
46
+ write_results_to_file formatted_results, output_file
35
47
 
36
- update_progress_bar progress_bar, limit, collection_count
48
+ update_progress_bar progress_bar, chunk_size, collection_count
37
49
 
38
- offset += limit
50
+ offset += chunk_size
39
51
 
40
52
  end
41
-
42
- formatted_results = format_results_for_type type, results
43
-
44
- write_results_to_file formatted_results, output_file, type.to_s
45
53
 
46
54
  end
47
55
 
@@ -121,9 +129,7 @@ module Pure
121
129
 
122
130
  end
123
131
 
124
- def self.write_results_to_file results, file, collection_name
125
-
126
- puts "Writing #{collection_name} to #{file}"
132
+ def self.write_results_to_file results, file
127
133
 
128
134
  File.open(file, "w") do |f|
129
135
  f.write(JSON.pretty_generate(results))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pure-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stephen Robinson
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2016-12-09 00:00:00.000000000 Z
12
+ date: 2017-01-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: clamp