pure-extractor 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: af0b81223fae580babc4b2c00650139f4598f1b7
4
- data.tar.gz: 6a359c0e046ce6b9196aeaab5c4372778f3e1c38
3
+ metadata.gz: 40f1c2344052bbe0bb2dd38b51ae78e0edca2138
4
+ data.tar.gz: b5f685c5147b0ae84e6aa7b71b73c1658df6a42b
5
5
  SHA512:
6
- metadata.gz: d23e4252837367cc9ebc244c24d9b3d201c51cbe218bcc77aa24aff4af889c99df5cdf956890c0eac3f3db752aebf604959ac6f0b24de62448b2705104b82388
7
- data.tar.gz: f5353af543b3a3b50e2458da57e44dd262834f815ecbdefbb674de35e4ad3220937b7494314ee7e489c9ea45b97137d1ebbfc9893d031d6f35e99f64d7b2ba36
6
+ metadata.gz: 72cdca316a23fe256800d219e4e5e168f8fcb0f332967c6c23f6a6f5689e0607fe67c2e9f7b9830ea98bc3a8508c3e4e58b065886f0e2baa554fdd014d4d5554
7
+ data.tar.gz: 18c0a8c121a20cbb5fafbe0ee9c5a3ce60449e0b15fa600c19bb9d86a6869897b1741532ad5476d35e98d9cbad3cf6b7b6f36cd1217bfdc2a6b9ff20b73e3b87
@@ -5,10 +5,10 @@ module Pure
5
5
  module Commands
6
6
  class PureCommand < Clamp::Command
7
7
 
8
- option ["-o", "--output-folder"], "folder", "folder to output to", required: true
8
+ option ["-o", "--output-file"], "file", "file to output to, when extracting all this is the folder to place output files", required: true
9
9
  option ["-s", "--server"], "server", "Full url to Pure WS rest server", required: true
10
- option ["-u", "--username"], "username", "Username to connect to Pure WS", required: true
11
- option ["-p", "--password"], "password", "Password to connect to Pure WS", required: true
10
+ option ["-u", "--username"], "username", "Username to connect to Pure WS"
11
+ option ["-p", "--password"], "password", "Password to connect to Pure WS"
12
12
 
13
13
  end
14
14
  end
@@ -8,9 +8,9 @@ module Pure
8
8
 
9
9
  include Pure::Extractor::ConfigurePuree
10
10
 
11
- valid_extracts = [:organisation, :people]
11
+ valid_extracts = [:organisation, :people, :projects, :publications, :datasets, :all]
12
12
 
13
- parameter "EXTRACT", "what to extract from pure" do |s|
13
+ parameter "EXTRACT", "what to extract from pure, valid options are #{valid_extracts.map{|v| v.to_s}}" do |s|
14
14
 
15
15
  s = s.to_sym
16
16
 
@@ -20,15 +20,31 @@ module Pure
20
20
 
21
21
  end
22
22
 
23
+ def pure_collections
24
+ {organisation: :organisation, people: :person, projects: :project, publications: :publication, datasets: :dataset}
25
+ end
26
+
23
27
  def execute
24
28
 
25
29
  configure_puree server, username, password
26
30
 
27
31
  case extract
28
32
 
29
- when :organisation
30
-
31
- Pure::Extractor::Organisation.extract output_folder
33
+ when :all
34
+
35
+ valid_extracts.each do |extract|
36
+
37
+ next unless extract != :all
38
+
39
+ filename = output_file + "/" + extract.to_s + ".json"
40
+
41
+ Pure::Extractor.extract pure_collections[extract], filename
42
+
43
+ end
44
+
45
+ else
46
+
47
+ Pure::Extractor.extract pure_collections[extract], output_file
32
48
 
33
49
  end
34
50
 
@@ -10,7 +10,7 @@ module Pure
10
10
 
11
11
  config.base_url = server
12
12
 
13
- if !username.empty? && !password.empty?
13
+ if !username.nil? && !password.nil? && !username.empty? && !password.empty?
14
14
 
15
15
  config.username = username
16
16
  config.password = password
@@ -1,5 +1,5 @@
1
1
  module Pure
2
2
  module Extractor
3
- VERSION = "0.2.0"
3
+ VERSION = "0.3.0"
4
4
  end
5
5
  end
@@ -1,11 +1,92 @@
1
1
  require "pure/extractor/version"
2
2
  require "pure/extractor/configure_puree"
3
3
  require "pure/extractor/commands/pure_extractor"
4
- require "pure/extractor/commands/pure_organisation_extractor"
5
- require "pure/extractor/organisation"
6
4
 
7
5
  module Pure
8
6
  module Extractor
9
- # Your code goes here...
7
+
8
+ def self.extract type, output_file
9
+
10
+ collection = Puree::Collection.new resource: type
11
+
12
+ collection_count = collection.find(limit: 1000000000, full: false).count
13
+
14
+ puts collection_count
15
+
16
+ progress_bar = ProgressBar.create(format: "%a %e %b\u{15E7}%i %p%% %t", progress_mark: ' ', remainder_mark: "\u{FF65}", total: collection_count)
17
+
18
+ offset = 0
19
+ limit = 20
20
+
21
+ results = []
22
+
23
+ while offset < collection_count do
24
+
25
+ returned_collection = collection.find limit: limit, offset: offset
26
+
27
+ returned_collection.each do |item|
28
+
29
+ delete_keys_for_type type, item
30
+
31
+ end
32
+
33
+ results.concat(returned_collection)
34
+
35
+ update_progress_bar progress_bar, limit, collection_count
36
+
37
+ offset += limit
38
+
39
+ end
40
+
41
+ write_results_to_file results, output_file, type.to_s
42
+
43
+ end
44
+
45
+ def self.delete_keys_for_type type, item
46
+
47
+ keys = []
48
+ nested_keys = {}
49
+
50
+ case type
51
+
52
+ when :dataset
53
+
54
+ keys = ["keyword", "file", "associated", "link", "spatial"]
55
+ nested_keys = { "person" => ["external", "other"] }
56
+
57
+ end
58
+
59
+ keys.each do |key|
60
+ item.delete(key)
61
+ end
62
+
63
+ nested_keys.each do |key, attribute|
64
+ item[key].delete(attribute)
65
+ end
66
+
67
+ item
68
+
69
+ end
70
+
71
+ def self.update_progress_bar progress_bar, limit, collection_count
72
+
73
+ if (progress_bar.progress + limit) < collection_count
74
+ progress_bar.progress += limit
75
+ else
76
+ progress_bar.progress = collection_count
77
+ end
78
+
79
+ end
80
+
81
+ def self.write_results_to_file results, file, collection_name
82
+
83
+ puts "Writing #{collection_name} to #{file}"
84
+
85
+ File.open(file, "w") do |f|
86
+ f.write(results.to_json)
87
+ end
88
+
89
+ end
90
+
10
91
  end
11
92
  end
@@ -28,6 +28,7 @@ Gem::Specification.new do |spec|
28
28
 
29
29
  spec.add_dependency "clamp"
30
30
  spec.add_dependency "puree"
31
+ spec.add_dependency "ruby-progressbar"
31
32
 
32
33
  spec.add_dependency "bundler", "~> 1.12"
33
34
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pure-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stephen Robinson
@@ -39,6 +39,20 @@ dependencies:
39
39
  - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: ruby-progressbar
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
42
56
  - !ruby/object:Gem::Dependency
43
57
  name: bundler
44
58
  requirement: !ruby/object:Gem::Requirement
@@ -87,7 +101,6 @@ email:
87
101
  - library.dit@lancaster.ac.uk
88
102
  executables:
89
103
  - pure-extractor
90
- - pure-organisation-extractor
91
104
  extensions: []
92
105
  extra_rdoc_files: []
93
106
  files:
@@ -98,13 +111,10 @@ files:
98
111
  - bin/console
99
112
  - bin/setup
100
113
  - exe/pure-extractor
101
- - exe/pure-organisation-extractor
102
114
  - lib/pure/extractor.rb
103
115
  - lib/pure/extractor/commands/pure_command.rb
104
116
  - lib/pure/extractor/commands/pure_extractor.rb
105
- - lib/pure/extractor/commands/pure_organisation_extractor.rb
106
117
  - lib/pure/extractor/configure_puree.rb
107
- - lib/pure/extractor/organisation.rb
108
118
  - lib/pure/extractor/version.rb
109
119
  - pure-extractor.gemspec
110
120
  homepage: https://github.com/lulibrary
@@ -1,6 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require "bundler/setup"
4
- require "pure/extractor"
5
-
6
- Pure::Extractor::Commands::PureOrganisationExtractorCommand.run
@@ -1,22 +0,0 @@
1
- require 'pure/extractor/commands/pure_command'
2
- require 'puree'
3
-
4
- module Pure
5
- module Extractor
6
- module Commands
7
- class PureOrganisationExtractorCommand < PureCommand
8
-
9
- include Pure::Extractor::ConfigurePuree
10
-
11
- def execute
12
-
13
- configure_puree server, username, password
14
-
15
- Pure::Extractor::Organisation.extract output_folder
16
-
17
- end
18
-
19
- end
20
- end
21
- end
22
- end
@@ -1,51 +0,0 @@
1
- require 'puree'
2
- require 'json'
3
-
4
- module Pure
5
- module Extractor
6
- class Organisation
7
-
8
- def self.extract output_folder
9
-
10
- filename = "organisation.json"
11
-
12
- org = Puree::Collection.new resource: :organisation
13
-
14
- org_uuids = org.find limit: 1000000000, full: false
15
-
16
- org_uuids.count
17
-
18
- offset = 0
19
- limit = 20
20
-
21
- orgs = []
22
-
23
- while offset < org_uuids.count do
24
-
25
- returned_orgs = org.find limit: limit, offset: offset
26
-
27
- returned_orgs.each do |r_org|
28
-
29
- r_org.delete("address")
30
- r_org.delete("email")
31
- r_org.delete("organisation")
32
- r_org.delete("phone")
33
- r_org.delete("url")
34
-
35
- end
36
-
37
- orgs.concat(returned_orgs)
38
-
39
- offset += limit
40
-
41
- end
42
-
43
- File.open(output_folder + "/" + filename, "w") do |f|
44
- f.write(orgs.to_json)
45
- end
46
-
47
- end
48
-
49
- end
50
- end
51
- end