pure-extractor 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pure/extractor/commands/pure_command.rb +3 -3
- data/lib/pure/extractor/commands/pure_extractor.rb +21 -5
- data/lib/pure/extractor/configure_puree.rb +1 -1
- data/lib/pure/extractor/version.rb +1 -1
- data/lib/pure/extractor.rb +84 -3
- data/pure-extractor.gemspec +1 -0
- metadata +15 -5
- data/exe/pure-organisation-extractor +0 -6
- data/lib/pure/extractor/commands/pure_organisation_extractor.rb +0 -22
- data/lib/pure/extractor/organisation.rb +0 -51
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40f1c2344052bbe0bb2dd38b51ae78e0edca2138
|
4
|
+
data.tar.gz: b5f685c5147b0ae84e6aa7b71b73c1658df6a42b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72cdca316a23fe256800d219e4e5e168f8fcb0f332967c6c23f6a6f5689e0607fe67c2e9f7b9830ea98bc3a8508c3e4e58b065886f0e2baa554fdd014d4d5554
|
7
|
+
data.tar.gz: 18c0a8c121a20cbb5fafbe0ee9c5a3ce60449e0b15fa600c19bb9d86a6869897b1741532ad5476d35e98d9cbad3cf6b7b6f36cd1217bfdc2a6b9ff20b73e3b87
|
@@ -5,10 +5,10 @@ module Pure
|
|
5
5
|
module Commands
|
6
6
|
class PureCommand < Clamp::Command
|
7
7
|
|
8
|
-
option ["-o", "--output-
|
8
|
+
option ["-o", "--output-file"], "file", "file to output to, when extracting all this is the folder to place output files", required: true
|
9
9
|
option ["-s", "--server"], "server", "Full url to Pure WS rest server", required: true
|
10
|
-
option ["-u", "--username"], "username", "Username to connect to Pure WS"
|
11
|
-
option ["-p", "--password"], "password", "Password to connect to Pure WS"
|
10
|
+
option ["-u", "--username"], "username", "Username to connect to Pure WS"
|
11
|
+
option ["-p", "--password"], "password", "Password to connect to Pure WS"
|
12
12
|
|
13
13
|
end
|
14
14
|
end
|
@@ -8,9 +8,9 @@ module Pure
|
|
8
8
|
|
9
9
|
include Pure::Extractor::ConfigurePuree
|
10
10
|
|
11
|
-
valid_extracts = [:organisation, :people]
|
11
|
+
valid_extracts = [:organisation, :people, :projects, :publications, :datasets, :all]
|
12
12
|
|
13
|
-
parameter "EXTRACT", "what to extract from pure" do |s|
|
13
|
+
parameter "EXTRACT", "what to extract from pure, valid options are #{valid_extracts.map{|v| v.to_s}}" do |s|
|
14
14
|
|
15
15
|
s = s.to_sym
|
16
16
|
|
@@ -20,15 +20,31 @@ module Pure
|
|
20
20
|
|
21
21
|
end
|
22
22
|
|
23
|
+
def pure_collections
|
24
|
+
{organisation: :organisation, people: :person, projects: :project, publications: :publication, datasets: :dataset}
|
25
|
+
end
|
26
|
+
|
23
27
|
def execute
|
24
28
|
|
25
29
|
configure_puree server, username, password
|
26
30
|
|
27
31
|
case extract
|
28
32
|
|
29
|
-
when :
|
30
|
-
|
31
|
-
|
33
|
+
when :all
|
34
|
+
|
35
|
+
valid_extracts.each do |extract|
|
36
|
+
|
37
|
+
next unless extract != :all
|
38
|
+
|
39
|
+
filename = output_file + "/" + extract.to_s + ".json"
|
40
|
+
|
41
|
+
Pure::Extractor.extract pure_collections[extract], filename
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
else
|
46
|
+
|
47
|
+
Pure::Extractor.extract pure_collections[extract], output_file
|
32
48
|
|
33
49
|
end
|
34
50
|
|
data/lib/pure/extractor.rb
CHANGED
@@ -1,11 +1,92 @@
|
|
1
1
|
require "pure/extractor/version"
|
2
2
|
require "pure/extractor/configure_puree"
|
3
3
|
require "pure/extractor/commands/pure_extractor"
|
4
|
-
require "pure/extractor/commands/pure_organisation_extractor"
|
5
|
-
require "pure/extractor/organisation"
|
6
4
|
|
7
5
|
module Pure
|
8
6
|
module Extractor
|
9
|
-
|
7
|
+
|
8
|
+
def self.extract type, output_file
|
9
|
+
|
10
|
+
collection = Puree::Collection.new resource: type
|
11
|
+
|
12
|
+
collection_count = collection.find(limit: 1000000000, full: false).count
|
13
|
+
|
14
|
+
puts collection_count
|
15
|
+
|
16
|
+
progress_bar = ProgressBar.create(format: "%a %e %b\u{15E7}%i %p%% %t", progress_mark: ' ', remainder_mark: "\u{FF65}", total: collection_count)
|
17
|
+
|
18
|
+
offset = 0
|
19
|
+
limit = 20
|
20
|
+
|
21
|
+
results = []
|
22
|
+
|
23
|
+
while offset < collection_count do
|
24
|
+
|
25
|
+
returned_collection = collection.find limit: limit, offset: offset
|
26
|
+
|
27
|
+
returned_collection.each do |item|
|
28
|
+
|
29
|
+
delete_keys_for_type type, item
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
results.concat(returned_collection)
|
34
|
+
|
35
|
+
update_progress_bar progress_bar, limit, collection_count
|
36
|
+
|
37
|
+
offset += limit
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
write_results_to_file results, output_file, type.to_s
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.delete_keys_for_type type, item
|
46
|
+
|
47
|
+
keys = []
|
48
|
+
nested_keys = {}
|
49
|
+
|
50
|
+
case type
|
51
|
+
|
52
|
+
when :dataset
|
53
|
+
|
54
|
+
keys = ["keyword", "file", "associated", "link", "spatial"]
|
55
|
+
nested_keys = { "person" => ["external", "other"] }
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
keys.each do |key|
|
60
|
+
item.delete(key)
|
61
|
+
end
|
62
|
+
|
63
|
+
nested_keys.each do |key, attribute|
|
64
|
+
item[key].delete(attribute)
|
65
|
+
end
|
66
|
+
|
67
|
+
item
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.update_progress_bar progress_bar, limit, collection_count
|
72
|
+
|
73
|
+
if (progress_bar.progress + limit) < collection_count
|
74
|
+
progress_bar.progress += limit
|
75
|
+
else
|
76
|
+
progress_bar.progress = collection_count
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.write_results_to_file results, file, collection_name
|
82
|
+
|
83
|
+
puts "Writing #{collection_name} to #{file}"
|
84
|
+
|
85
|
+
File.open(file, "w") do |f|
|
86
|
+
f.write(results.to_json)
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
10
91
|
end
|
11
92
|
end
|
data/pure-extractor.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pure-extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stephen Robinson
|
@@ -39,6 +39,20 @@ dependencies:
|
|
39
39
|
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: ruby-progressbar
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
42
56
|
- !ruby/object:Gem::Dependency
|
43
57
|
name: bundler
|
44
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,7 +101,6 @@ email:
|
|
87
101
|
- library.dit@lancaster.ac.uk
|
88
102
|
executables:
|
89
103
|
- pure-extractor
|
90
|
-
- pure-organisation-extractor
|
91
104
|
extensions: []
|
92
105
|
extra_rdoc_files: []
|
93
106
|
files:
|
@@ -98,13 +111,10 @@ files:
|
|
98
111
|
- bin/console
|
99
112
|
- bin/setup
|
100
113
|
- exe/pure-extractor
|
101
|
-
- exe/pure-organisation-extractor
|
102
114
|
- lib/pure/extractor.rb
|
103
115
|
- lib/pure/extractor/commands/pure_command.rb
|
104
116
|
- lib/pure/extractor/commands/pure_extractor.rb
|
105
|
-
- lib/pure/extractor/commands/pure_organisation_extractor.rb
|
106
117
|
- lib/pure/extractor/configure_puree.rb
|
107
|
-
- lib/pure/extractor/organisation.rb
|
108
118
|
- lib/pure/extractor/version.rb
|
109
119
|
- pure-extractor.gemspec
|
110
120
|
homepage: https://github.com/lulibrary
|
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'pure/extractor/commands/pure_command'
|
2
|
-
require 'puree'
|
3
|
-
|
4
|
-
module Pure
|
5
|
-
module Extractor
|
6
|
-
module Commands
|
7
|
-
class PureOrganisationExtractorCommand < PureCommand
|
8
|
-
|
9
|
-
include Pure::Extractor::ConfigurePuree
|
10
|
-
|
11
|
-
def execute
|
12
|
-
|
13
|
-
configure_puree server, username, password
|
14
|
-
|
15
|
-
Pure::Extractor::Organisation.extract output_folder
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
require 'puree'
|
2
|
-
require 'json'
|
3
|
-
|
4
|
-
module Pure
|
5
|
-
module Extractor
|
6
|
-
class Organisation
|
7
|
-
|
8
|
-
def self.extract output_folder
|
9
|
-
|
10
|
-
filename = "organisation.json"
|
11
|
-
|
12
|
-
org = Puree::Collection.new resource: :organisation
|
13
|
-
|
14
|
-
org_uuids = org.find limit: 1000000000, full: false
|
15
|
-
|
16
|
-
org_uuids.count
|
17
|
-
|
18
|
-
offset = 0
|
19
|
-
limit = 20
|
20
|
-
|
21
|
-
orgs = []
|
22
|
-
|
23
|
-
while offset < org_uuids.count do
|
24
|
-
|
25
|
-
returned_orgs = org.find limit: limit, offset: offset
|
26
|
-
|
27
|
-
returned_orgs.each do |r_org|
|
28
|
-
|
29
|
-
r_org.delete("address")
|
30
|
-
r_org.delete("email")
|
31
|
-
r_org.delete("organisation")
|
32
|
-
r_org.delete("phone")
|
33
|
-
r_org.delete("url")
|
34
|
-
|
35
|
-
end
|
36
|
-
|
37
|
-
orgs.concat(returned_orgs)
|
38
|
-
|
39
|
-
offset += limit
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
File.open(output_folder + "/" + filename, "w") do |f|
|
44
|
-
f.write(orgs.to_json)
|
45
|
-
end
|
46
|
-
|
47
|
-
end
|
48
|
-
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|