pure-extractor 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pure/extractor/commands/pure_command.rb +3 -3
- data/lib/pure/extractor/commands/pure_extractor.rb +21 -5
- data/lib/pure/extractor/configure_puree.rb +1 -1
- data/lib/pure/extractor/version.rb +1 -1
- data/lib/pure/extractor.rb +84 -3
- data/pure-extractor.gemspec +1 -0
- metadata +15 -5
- data/exe/pure-organisation-extractor +0 -6
- data/lib/pure/extractor/commands/pure_organisation_extractor.rb +0 -22
- data/lib/pure/extractor/organisation.rb +0 -51
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40f1c2344052bbe0bb2dd38b51ae78e0edca2138
|
4
|
+
data.tar.gz: b5f685c5147b0ae84e6aa7b71b73c1658df6a42b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72cdca316a23fe256800d219e4e5e168f8fcb0f332967c6c23f6a6f5689e0607fe67c2e9f7b9830ea98bc3a8508c3e4e58b065886f0e2baa554fdd014d4d5554
|
7
|
+
data.tar.gz: 18c0a8c121a20cbb5fafbe0ee9c5a3ce60449e0b15fa600c19bb9d86a6869897b1741532ad5476d35e98d9cbad3cf6b7b6f36cd1217bfdc2a6b9ff20b73e3b87
|
@@ -5,10 +5,10 @@ module Pure
|
|
5
5
|
module Commands
|
6
6
|
class PureCommand < Clamp::Command
|
7
7
|
|
8
|
-
option ["-o", "--output-
|
8
|
+
option ["-o", "--output-file"], "file", "file to output to, when extracting all this is the folder to place output files", required: true
|
9
9
|
option ["-s", "--server"], "server", "Full url to Pure WS rest server", required: true
|
10
|
-
option ["-u", "--username"], "username", "Username to connect to Pure WS"
|
11
|
-
option ["-p", "--password"], "password", "Password to connect to Pure WS"
|
10
|
+
option ["-u", "--username"], "username", "Username to connect to Pure WS"
|
11
|
+
option ["-p", "--password"], "password", "Password to connect to Pure WS"
|
12
12
|
|
13
13
|
end
|
14
14
|
end
|
@@ -8,9 +8,9 @@ module Pure
|
|
8
8
|
|
9
9
|
include Pure::Extractor::ConfigurePuree
|
10
10
|
|
11
|
-
valid_extracts = [:organisation, :people]
|
11
|
+
valid_extracts = [:organisation, :people, :projects, :publications, :datasets, :all]
|
12
12
|
|
13
|
-
parameter "EXTRACT", "what to extract from pure" do |s|
|
13
|
+
parameter "EXTRACT", "what to extract from pure, valid options are #{valid_extracts.map{|v| v.to_s}}" do |s|
|
14
14
|
|
15
15
|
s = s.to_sym
|
16
16
|
|
@@ -20,15 +20,31 @@ module Pure
|
|
20
20
|
|
21
21
|
end
|
22
22
|
|
23
|
+
def pure_collections
|
24
|
+
{organisation: :organisation, people: :person, projects: :project, publications: :publication, datasets: :dataset}
|
25
|
+
end
|
26
|
+
|
23
27
|
def execute
|
24
28
|
|
25
29
|
configure_puree server, username, password
|
26
30
|
|
27
31
|
case extract
|
28
32
|
|
29
|
-
when :
|
30
|
-
|
31
|
-
|
33
|
+
when :all
|
34
|
+
|
35
|
+
valid_extracts.each do |extract|
|
36
|
+
|
37
|
+
next unless extract != :all
|
38
|
+
|
39
|
+
filename = output_file + "/" + extract.to_s + ".json"
|
40
|
+
|
41
|
+
Pure::Extractor.extract pure_collections[extract], filename
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
else
|
46
|
+
|
47
|
+
Pure::Extractor.extract pure_collections[extract], output_file
|
32
48
|
|
33
49
|
end
|
34
50
|
|
data/lib/pure/extractor.rb
CHANGED
@@ -1,11 +1,92 @@
|
|
1
1
|
require "pure/extractor/version"
|
2
2
|
require "pure/extractor/configure_puree"
|
3
3
|
require "pure/extractor/commands/pure_extractor"
|
4
|
-
require "pure/extractor/commands/pure_organisation_extractor"
|
5
|
-
require "pure/extractor/organisation"
|
6
4
|
|
7
5
|
module Pure
|
8
6
|
module Extractor
|
9
|
-
|
7
|
+
|
8
|
+
def self.extract type, output_file
|
9
|
+
|
10
|
+
collection = Puree::Collection.new resource: type
|
11
|
+
|
12
|
+
collection_count = collection.find(limit: 1000000000, full: false).count
|
13
|
+
|
14
|
+
puts collection_count
|
15
|
+
|
16
|
+
progress_bar = ProgressBar.create(format: "%a %e %b\u{15E7}%i %p%% %t", progress_mark: ' ', remainder_mark: "\u{FF65}", total: collection_count)
|
17
|
+
|
18
|
+
offset = 0
|
19
|
+
limit = 20
|
20
|
+
|
21
|
+
results = []
|
22
|
+
|
23
|
+
while offset < collection_count do
|
24
|
+
|
25
|
+
returned_collection = collection.find limit: limit, offset: offset
|
26
|
+
|
27
|
+
returned_collection.each do |item|
|
28
|
+
|
29
|
+
delete_keys_for_type type, item
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
results.concat(returned_collection)
|
34
|
+
|
35
|
+
update_progress_bar progress_bar, limit, collection_count
|
36
|
+
|
37
|
+
offset += limit
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
write_results_to_file results, output_file, type.to_s
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.delete_keys_for_type type, item
|
46
|
+
|
47
|
+
keys = []
|
48
|
+
nested_keys = {}
|
49
|
+
|
50
|
+
case type
|
51
|
+
|
52
|
+
when :dataset
|
53
|
+
|
54
|
+
keys = ["keyword", "file", "associated", "link", "spatial"]
|
55
|
+
nested_keys = { "person" => ["external", "other"] }
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
keys.each do |key|
|
60
|
+
item.delete(key)
|
61
|
+
end
|
62
|
+
|
63
|
+
nested_keys.each do |key, attribute|
|
64
|
+
item[key].delete(attribute)
|
65
|
+
end
|
66
|
+
|
67
|
+
item
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.update_progress_bar progress_bar, limit, collection_count
|
72
|
+
|
73
|
+
if (progress_bar.progress + limit) < collection_count
|
74
|
+
progress_bar.progress += limit
|
75
|
+
else
|
76
|
+
progress_bar.progress = collection_count
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.write_results_to_file results, file, collection_name
|
82
|
+
|
83
|
+
puts "Writing #{collection_name} to #{file}"
|
84
|
+
|
85
|
+
File.open(file, "w") do |f|
|
86
|
+
f.write(results.to_json)
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
10
91
|
end
|
11
92
|
end
|
data/pure-extractor.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pure-extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stephen Robinson
|
@@ -39,6 +39,20 @@ dependencies:
|
|
39
39
|
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: ruby-progressbar
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
42
56
|
- !ruby/object:Gem::Dependency
|
43
57
|
name: bundler
|
44
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,7 +101,6 @@ email:
|
|
87
101
|
- library.dit@lancaster.ac.uk
|
88
102
|
executables:
|
89
103
|
- pure-extractor
|
90
|
-
- pure-organisation-extractor
|
91
104
|
extensions: []
|
92
105
|
extra_rdoc_files: []
|
93
106
|
files:
|
@@ -98,13 +111,10 @@ files:
|
|
98
111
|
- bin/console
|
99
112
|
- bin/setup
|
100
113
|
- exe/pure-extractor
|
101
|
-
- exe/pure-organisation-extractor
|
102
114
|
- lib/pure/extractor.rb
|
103
115
|
- lib/pure/extractor/commands/pure_command.rb
|
104
116
|
- lib/pure/extractor/commands/pure_extractor.rb
|
105
|
-
- lib/pure/extractor/commands/pure_organisation_extractor.rb
|
106
117
|
- lib/pure/extractor/configure_puree.rb
|
107
|
-
- lib/pure/extractor/organisation.rb
|
108
118
|
- lib/pure/extractor/version.rb
|
109
119
|
- pure-extractor.gemspec
|
110
120
|
homepage: https://github.com/lulibrary
|
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'pure/extractor/commands/pure_command'
|
2
|
-
require 'puree'
|
3
|
-
|
4
|
-
module Pure
|
5
|
-
module Extractor
|
6
|
-
module Commands
|
7
|
-
class PureOrganisationExtractorCommand < PureCommand
|
8
|
-
|
9
|
-
include Pure::Extractor::ConfigurePuree
|
10
|
-
|
11
|
-
def execute
|
12
|
-
|
13
|
-
configure_puree server, username, password
|
14
|
-
|
15
|
-
Pure::Extractor::Organisation.extract output_folder
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
require 'puree'
|
2
|
-
require 'json'
|
3
|
-
|
4
|
-
module Pure
|
5
|
-
module Extractor
|
6
|
-
class Organisation
|
7
|
-
|
8
|
-
def self.extract output_folder
|
9
|
-
|
10
|
-
filename = "organisation.json"
|
11
|
-
|
12
|
-
org = Puree::Collection.new resource: :organisation
|
13
|
-
|
14
|
-
org_uuids = org.find limit: 1000000000, full: false
|
15
|
-
|
16
|
-
org_uuids.count
|
17
|
-
|
18
|
-
offset = 0
|
19
|
-
limit = 20
|
20
|
-
|
21
|
-
orgs = []
|
22
|
-
|
23
|
-
while offset < org_uuids.count do
|
24
|
-
|
25
|
-
returned_orgs = org.find limit: limit, offset: offset
|
26
|
-
|
27
|
-
returned_orgs.each do |r_org|
|
28
|
-
|
29
|
-
r_org.delete("address")
|
30
|
-
r_org.delete("email")
|
31
|
-
r_org.delete("organisation")
|
32
|
-
r_org.delete("phone")
|
33
|
-
r_org.delete("url")
|
34
|
-
|
35
|
-
end
|
36
|
-
|
37
|
-
orgs.concat(returned_orgs)
|
38
|
-
|
39
|
-
offset += limit
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
File.open(output_folder + "/" + filename, "w") do |f|
|
44
|
-
f.write(orgs.to_json)
|
45
|
-
end
|
46
|
-
|
47
|
-
end
|
48
|
-
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|