pure-extractor 0.6.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/pure/extractor/commands/pure_command.rb +2 -1
- data/lib/pure/extractor/commands/pure_extractor.rb +2 -20
- data/lib/pure/extractor/version.rb +1 -1
- data/lib/pure/extractor.rb +21 -15
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0219796a63f3282dbcd658e866487dfafaf5e6e5
|
4
|
+
data.tar.gz: 690d3103961bf267a92faeb1d7587597997d32da
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cb3681cb35c6649b68865604a565a6bebaac1451fc6d42302078ba94c71c1654d50022d2d19b658bf1f70ed44d8fa3e53f8743df5b0c73c3932728f24beee314
|
7
|
+
data.tar.gz: a2604591ba5435cebe5900a05f63fee26dc57e1a6964f0958ee9a527133220fb61ef76582100a259d672b19ae35d93cadbf52f3565890a1c6c915103e80a8d59
|
data/.gitignore
CHANGED
@@ -5,10 +5,11 @@ module Pure
|
|
5
5
|
module Commands
|
6
6
|
class PureCommand < Clamp::Command
|
7
7
|
|
8
|
-
option ["-o", "--output-
|
8
|
+
option ["-o", "--output-dir"], "output-dir", "Directory to store generated files in", required: true
|
9
9
|
option ["-s", "--server"], "server", "Full url to Pure WS rest server", required: true
|
10
10
|
option ["-u", "--username"], "username", "Username to connect to Pure WS"
|
11
11
|
option ["-p", "--password"], "password", "Password to connect to Pure WS"
|
12
|
+
option ["-c", "--chunk-size"], "chunk-size", "Number of entities to extract per file, defaults to 200"
|
12
13
|
|
13
14
|
end
|
14
15
|
end
|
@@ -8,7 +8,7 @@ module Pure
|
|
8
8
|
|
9
9
|
include Pure::Extractor::ConfigurePuree
|
10
10
|
|
11
|
-
valid_extracts = [:organisation, :people, :projects, :publications, :datasets
|
11
|
+
valid_extracts = [:organisation, :people, :projects, :publications, :datasets]
|
12
12
|
|
13
13
|
parameter "EXTRACT", "what to extract from pure, valid options are #{valid_extracts.map{|v| v.to_s}}" do |s|
|
14
14
|
|
@@ -27,26 +27,8 @@ module Pure
|
|
27
27
|
def execute
|
28
28
|
|
29
29
|
configure_puree server, username, password
|
30
|
-
|
31
|
-
case extract
|
32
|
-
|
33
|
-
when :all
|
34
|
-
|
35
|
-
valid_extracts.each do |extract|
|
36
|
-
|
37
|
-
next unless extract != :all
|
38
|
-
|
39
|
-
filename = output_file + "/" + extract.to_s + ".json"
|
40
|
-
|
41
|
-
Pure::Extractor.extract pure_collections[extract], filename
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
else
|
46
|
-
|
47
|
-
Pure::Extractor.extract pure_collections[extract], output_file
|
48
30
|
|
49
|
-
|
31
|
+
Pure::Extractor.extract pure_collections[extract], chunk_size, output_dir
|
50
32
|
|
51
33
|
end
|
52
34
|
|
data/lib/pure/extractor.rb
CHANGED
@@ -6,7 +6,7 @@ require 'ruby-progressbar'
|
|
6
6
|
module Pure
|
7
7
|
module Extractor
|
8
8
|
|
9
|
-
def self.extract type,
|
9
|
+
def self.extract type, chunk_size, output_directory
|
10
10
|
|
11
11
|
collection = Puree::Collection.new resource: type
|
12
12
|
|
@@ -17,13 +17,23 @@ module Pure
|
|
17
17
|
progress_bar = ProgressBar.create(format: "%a %e %b\u{15E7}%i %p%% %t", progress_mark: ' ', remainder_mark: "\u{FF65}", total: collection_count)
|
18
18
|
|
19
19
|
offset = 0
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
file_id = 0
|
21
|
+
|
22
|
+
if chunk_size.nil? || chunk_size.empty?
|
23
|
+
chunk_size = 200
|
24
|
+
end
|
25
|
+
|
26
|
+
chunk_size = chunk_size.to_i
|
23
27
|
|
24
28
|
while offset < collection_count do
|
29
|
+
|
30
|
+
file_id += 1
|
31
|
+
|
32
|
+
filename = type.to_s + "_#{file_id.to_s.rjust(6, '0')}"
|
33
|
+
|
34
|
+
output_file = output_directory + "/#{filename}.json"
|
25
35
|
|
26
|
-
returned_collection = collection.find limit:
|
36
|
+
returned_collection = collection.find limit: chunk_size, offset: offset
|
27
37
|
|
28
38
|
returned_collection.each do |item|
|
29
39
|
|
@@ -31,17 +41,15 @@ module Pure
|
|
31
41
|
|
32
42
|
end
|
33
43
|
|
34
|
-
|
44
|
+
formatted_results = format_results_for_type type, returned_collection
|
45
|
+
|
46
|
+
write_results_to_file formatted_results, output_file
|
35
47
|
|
36
|
-
update_progress_bar progress_bar,
|
48
|
+
update_progress_bar progress_bar, chunk_size, collection_count
|
37
49
|
|
38
|
-
offset +=
|
50
|
+
offset += chunk_size
|
39
51
|
|
40
52
|
end
|
41
|
-
|
42
|
-
formatted_results = format_results_for_type type, results
|
43
|
-
|
44
|
-
write_results_to_file formatted_results, output_file, type.to_s
|
45
53
|
|
46
54
|
end
|
47
55
|
|
@@ -121,9 +129,7 @@ module Pure
|
|
121
129
|
|
122
130
|
end
|
123
131
|
|
124
|
-
def self.write_results_to_file results, file
|
125
|
-
|
126
|
-
puts "Writing #{collection_name} to #{file}"
|
132
|
+
def self.write_results_to_file results, file
|
127
133
|
|
128
134
|
File.open(file, "w") do |f|
|
129
135
|
f.write(JSON.pretty_generate(results))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pure-extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stephen Robinson
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2017-01-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: clamp
|