pure-extractor 0.6.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/pure/extractor/commands/pure_command.rb +2 -1
- data/lib/pure/extractor/commands/pure_extractor.rb +2 -20
- data/lib/pure/extractor/version.rb +1 -1
- data/lib/pure/extractor.rb +21 -15
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0219796a63f3282dbcd658e866487dfafaf5e6e5
|
4
|
+
data.tar.gz: 690d3103961bf267a92faeb1d7587597997d32da
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cb3681cb35c6649b68865604a565a6bebaac1451fc6d42302078ba94c71c1654d50022d2d19b658bf1f70ed44d8fa3e53f8743df5b0c73c3932728f24beee314
|
7
|
+
data.tar.gz: a2604591ba5435cebe5900a05f63fee26dc57e1a6964f0958ee9a527133220fb61ef76582100a259d672b19ae35d93cadbf52f3565890a1c6c915103e80a8d59
|
data/.gitignore
CHANGED
@@ -5,10 +5,11 @@ module Pure
|
|
5
5
|
module Commands
|
6
6
|
class PureCommand < Clamp::Command
|
7
7
|
|
8
|
-
option ["-o", "--output-
|
8
|
+
option ["-o", "--output-dir"], "output-dir", "Directory to store generated files in", required: true
|
9
9
|
option ["-s", "--server"], "server", "Full url to Pure WS rest server", required: true
|
10
10
|
option ["-u", "--username"], "username", "Username to connect to Pure WS"
|
11
11
|
option ["-p", "--password"], "password", "Password to connect to Pure WS"
|
12
|
+
option ["-c", "--chunk-size"], "chunk-size", "Number of entities to extract per file, defaults to 200"
|
12
13
|
|
13
14
|
end
|
14
15
|
end
|
@@ -8,7 +8,7 @@ module Pure
|
|
8
8
|
|
9
9
|
include Pure::Extractor::ConfigurePuree
|
10
10
|
|
11
|
-
valid_extracts = [:organisation, :people, :projects, :publications, :datasets
|
11
|
+
valid_extracts = [:organisation, :people, :projects, :publications, :datasets]
|
12
12
|
|
13
13
|
parameter "EXTRACT", "what to extract from pure, valid options are #{valid_extracts.map{|v| v.to_s}}" do |s|
|
14
14
|
|
@@ -27,26 +27,8 @@ module Pure
|
|
27
27
|
def execute
|
28
28
|
|
29
29
|
configure_puree server, username, password
|
30
|
-
|
31
|
-
case extract
|
32
|
-
|
33
|
-
when :all
|
34
|
-
|
35
|
-
valid_extracts.each do |extract|
|
36
|
-
|
37
|
-
next unless extract != :all
|
38
|
-
|
39
|
-
filename = output_file + "/" + extract.to_s + ".json"
|
40
|
-
|
41
|
-
Pure::Extractor.extract pure_collections[extract], filename
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
else
|
46
|
-
|
47
|
-
Pure::Extractor.extract pure_collections[extract], output_file
|
48
30
|
|
49
|
-
|
31
|
+
Pure::Extractor.extract pure_collections[extract], chunk_size, output_dir
|
50
32
|
|
51
33
|
end
|
52
34
|
|
data/lib/pure/extractor.rb
CHANGED
@@ -6,7 +6,7 @@ require 'ruby-progressbar'
|
|
6
6
|
module Pure
|
7
7
|
module Extractor
|
8
8
|
|
9
|
-
def self.extract type,
|
9
|
+
def self.extract type, chunk_size, output_directory
|
10
10
|
|
11
11
|
collection = Puree::Collection.new resource: type
|
12
12
|
|
@@ -17,13 +17,23 @@ module Pure
|
|
17
17
|
progress_bar = ProgressBar.create(format: "%a %e %b\u{15E7}%i %p%% %t", progress_mark: ' ', remainder_mark: "\u{FF65}", total: collection_count)
|
18
18
|
|
19
19
|
offset = 0
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
file_id = 0
|
21
|
+
|
22
|
+
if chunk_size.nil? || chunk_size.empty?
|
23
|
+
chunk_size = 200
|
24
|
+
end
|
25
|
+
|
26
|
+
chunk_size = chunk_size.to_i
|
23
27
|
|
24
28
|
while offset < collection_count do
|
29
|
+
|
30
|
+
file_id += 1
|
31
|
+
|
32
|
+
filename = type.to_s + "_#{file_id.to_s.rjust(6, '0')}"
|
33
|
+
|
34
|
+
output_file = output_directory + "/#{filename}.json"
|
25
35
|
|
26
|
-
returned_collection = collection.find limit:
|
36
|
+
returned_collection = collection.find limit: chunk_size, offset: offset
|
27
37
|
|
28
38
|
returned_collection.each do |item|
|
29
39
|
|
@@ -31,17 +41,15 @@ module Pure
|
|
31
41
|
|
32
42
|
end
|
33
43
|
|
34
|
-
|
44
|
+
formatted_results = format_results_for_type type, returned_collection
|
45
|
+
|
46
|
+
write_results_to_file formatted_results, output_file
|
35
47
|
|
36
|
-
update_progress_bar progress_bar,
|
48
|
+
update_progress_bar progress_bar, chunk_size, collection_count
|
37
49
|
|
38
|
-
offset +=
|
50
|
+
offset += chunk_size
|
39
51
|
|
40
52
|
end
|
41
|
-
|
42
|
-
formatted_results = format_results_for_type type, results
|
43
|
-
|
44
|
-
write_results_to_file formatted_results, output_file, type.to_s
|
45
53
|
|
46
54
|
end
|
47
55
|
|
@@ -121,9 +129,7 @@ module Pure
|
|
121
129
|
|
122
130
|
end
|
123
131
|
|
124
|
-
def self.write_results_to_file results, file
|
125
|
-
|
126
|
-
puts "Writing #{collection_name} to #{file}"
|
132
|
+
def self.write_results_to_file results, file
|
127
133
|
|
128
134
|
File.open(file, "w") do |f|
|
129
135
|
f.write(JSON.pretty_generate(results))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pure-extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stephen Robinson
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2017-01-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: clamp
|