pure-extractor 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: af0b81223fae580babc4b2c00650139f4598f1b7
4
- data.tar.gz: 6a359c0e046ce6b9196aeaab5c4372778f3e1c38
3
+ metadata.gz: 40f1c2344052bbe0bb2dd38b51ae78e0edca2138
4
+ data.tar.gz: b5f685c5147b0ae84e6aa7b71b73c1658df6a42b
5
5
  SHA512:
6
- metadata.gz: d23e4252837367cc9ebc244c24d9b3d201c51cbe218bcc77aa24aff4af889c99df5cdf956890c0eac3f3db752aebf604959ac6f0b24de62448b2705104b82388
7
- data.tar.gz: f5353af543b3a3b50e2458da57e44dd262834f815ecbdefbb674de35e4ad3220937b7494314ee7e489c9ea45b97137d1ebbfc9893d031d6f35e99f64d7b2ba36
6
+ metadata.gz: 72cdca316a23fe256800d219e4e5e168f8fcb0f332967c6c23f6a6f5689e0607fe67c2e9f7b9830ea98bc3a8508c3e4e58b065886f0e2baa554fdd014d4d5554
7
+ data.tar.gz: 18c0a8c121a20cbb5fafbe0ee9c5a3ce60449e0b15fa600c19bb9d86a6869897b1741532ad5476d35e98d9cbad3cf6b7b6f36cd1217bfdc2a6b9ff20b73e3b87
@@ -5,10 +5,10 @@ module Pure
5
5
  module Commands
6
6
  class PureCommand < Clamp::Command
7
7
 
8
- option ["-o", "--output-folder"], "folder", "folder to output to", required: true
8
+ option ["-o", "--output-file"], "file", "file to output to, when extracting all this is the folder to place output files", required: true
9
9
  option ["-s", "--server"], "server", "Full url to Pure WS rest server", required: true
10
- option ["-u", "--username"], "username", "Username to connect to Pure WS", required: true
11
- option ["-p", "--password"], "password", "Password to connect to Pure WS", required: true
10
+ option ["-u", "--username"], "username", "Username to connect to Pure WS"
11
+ option ["-p", "--password"], "password", "Password to connect to Pure WS"
12
12
 
13
13
  end
14
14
  end
@@ -8,9 +8,9 @@ module Pure
8
8
 
9
9
  include Pure::Extractor::ConfigurePuree
10
10
 
11
- valid_extracts = [:organisation, :people]
11
+ valid_extracts = [:organisation, :people, :projects, :publications, :datasets, :all]
12
12
 
13
- parameter "EXTRACT", "what to extract from pure" do |s|
13
+ parameter "EXTRACT", "what to extract from pure, valid options are #{valid_extracts.map{|v| v.to_s}}" do |s|
14
14
 
15
15
  s = s.to_sym
16
16
 
@@ -20,15 +20,31 @@ module Pure
20
20
 
21
21
  end
22
22
 
23
+ def pure_collections
24
+ {organisation: :organisation, people: :person, projects: :project, publications: :publication, datasets: :dataset}
25
+ end
26
+
23
27
  def execute
24
28
 
25
29
  configure_puree server, username, password
26
30
 
27
31
  case extract
28
32
 
29
- when :organisation
30
-
31
- Pure::Extractor::Organisation.extract output_folder
33
+ when :all
34
+
35
+ valid_extracts.each do |extract|
36
+
37
+ next unless extract != :all
38
+
39
+ filename = output_file + "/" + extract.to_s + ".json"
40
+
41
+ Pure::Extractor.extract pure_collections[extract], filename
42
+
43
+ end
44
+
45
+ else
46
+
47
+ Pure::Extractor.extract pure_collections[extract], output_file
32
48
 
33
49
  end
34
50
 
@@ -10,7 +10,7 @@ module Pure
10
10
 
11
11
  config.base_url = server
12
12
 
13
- if !username.empty? && !password.empty?
13
+ if !username.nil? && !password.nil? && !username.empty? && !password.empty?
14
14
 
15
15
  config.username = username
16
16
  config.password = password
@@ -1,5 +1,5 @@
1
1
  module Pure
2
2
  module Extractor
3
- VERSION = "0.2.0"
3
+ VERSION = "0.3.0"
4
4
  end
5
5
  end
@@ -1,11 +1,92 @@
1
1
  require "pure/extractor/version"
2
2
  require "pure/extractor/configure_puree"
3
3
  require "pure/extractor/commands/pure_extractor"
4
- require "pure/extractor/commands/pure_organisation_extractor"
5
- require "pure/extractor/organisation"
6
4
 
7
5
  module Pure
8
6
  module Extractor
9
- # Your code goes here...
7
+
8
+ def self.extract type, output_file
9
+
10
+ collection = Puree::Collection.new resource: type
11
+
12
+ collection_count = collection.find(limit: 1000000000, full: false).count
13
+
14
+ puts collection_count
15
+
16
+ progress_bar = ProgressBar.create(format: "%a %e %b\u{15E7}%i %p%% %t", progress_mark: ' ', remainder_mark: "\u{FF65}", total: collection_count)
17
+
18
+ offset = 0
19
+ limit = 20
20
+
21
+ results = []
22
+
23
+ while offset < collection_count do
24
+
25
+ returned_collection = collection.find limit: limit, offset: offset
26
+
27
+ returned_collection.each do |item|
28
+
29
+ delete_keys_for_type type, item
30
+
31
+ end
32
+
33
+ results.concat(returned_collection)
34
+
35
+ update_progress_bar progress_bar, limit, collection_count
36
+
37
+ offset += limit
38
+
39
+ end
40
+
41
+ write_results_to_file results, output_file, type.to_s
42
+
43
+ end
44
+
45
+ def self.delete_keys_for_type type, item
46
+
47
+ keys = []
48
+ nested_keys = {}
49
+
50
+ case type
51
+
52
+ when :dataset
53
+
54
+ keys = ["keyword", "file", "associated", "link", "spatial"]
55
+ nested_keys = { "person" => ["external", "other"] }
56
+
57
+ end
58
+
59
+ keys.each do |key|
60
+ item.delete(key)
61
+ end
62
+
63
+ nested_keys.each do |key, attribute|
64
+ item[key].delete(attribute)
65
+ end
66
+
67
+ item
68
+
69
+ end
70
+
71
+ def self.update_progress_bar progress_bar, limit, collection_count
72
+
73
+ if (progress_bar.progress + limit) < collection_count
74
+ progress_bar.progress += limit
75
+ else
76
+ progress_bar.progress = collection_count
77
+ end
78
+
79
+ end
80
+
81
+ def self.write_results_to_file results, file, collection_name
82
+
83
+ puts "Writing #{collection_name} to #{file}"
84
+
85
+ File.open(file, "w") do |f|
86
+ f.write(results.to_json)
87
+ end
88
+
89
+ end
90
+
10
91
  end
11
92
  end
@@ -28,6 +28,7 @@ Gem::Specification.new do |spec|
28
28
 
29
29
  spec.add_dependency "clamp"
30
30
  spec.add_dependency "puree"
31
+ spec.add_dependency "ruby-progressbar"
31
32
 
32
33
  spec.add_dependency "bundler", "~> 1.12"
33
34
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pure-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stephen Robinson
@@ -39,6 +39,20 @@ dependencies:
39
39
  - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: ruby-progressbar
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
42
56
  - !ruby/object:Gem::Dependency
43
57
  name: bundler
44
58
  requirement: !ruby/object:Gem::Requirement
@@ -87,7 +101,6 @@ email:
87
101
  - library.dit@lancaster.ac.uk
88
102
  executables:
89
103
  - pure-extractor
90
- - pure-organisation-extractor
91
104
  extensions: []
92
105
  extra_rdoc_files: []
93
106
  files:
@@ -98,13 +111,10 @@ files:
98
111
  - bin/console
99
112
  - bin/setup
100
113
  - exe/pure-extractor
101
- - exe/pure-organisation-extractor
102
114
  - lib/pure/extractor.rb
103
115
  - lib/pure/extractor/commands/pure_command.rb
104
116
  - lib/pure/extractor/commands/pure_extractor.rb
105
- - lib/pure/extractor/commands/pure_organisation_extractor.rb
106
117
  - lib/pure/extractor/configure_puree.rb
107
- - lib/pure/extractor/organisation.rb
108
118
  - lib/pure/extractor/version.rb
109
119
  - pure-extractor.gemspec
110
120
  homepage: https://github.com/lulibrary
@@ -1,6 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require "bundler/setup"
4
- require "pure/extractor"
5
-
6
- Pure::Extractor::Commands::PureOrganisationExtractorCommand.run
@@ -1,22 +0,0 @@
1
- require 'pure/extractor/commands/pure_command'
2
- require 'puree'
3
-
4
- module Pure
5
- module Extractor
6
- module Commands
7
- class PureOrganisationExtractorCommand < PureCommand
8
-
9
- include Pure::Extractor::ConfigurePuree
10
-
11
- def execute
12
-
13
- configure_puree server, username, password
14
-
15
- Pure::Extractor::Organisation.extract output_folder
16
-
17
- end
18
-
19
- end
20
- end
21
- end
22
- end
@@ -1,51 +0,0 @@
1
- require 'puree'
2
- require 'json'
3
-
4
- module Pure
5
- module Extractor
6
- class Organisation
7
-
8
- def self.extract output_folder
9
-
10
- filename = "organisation.json"
11
-
12
- org = Puree::Collection.new resource: :organisation
13
-
14
- org_uuids = org.find limit: 1000000000, full: false
15
-
16
- org_uuids.count
17
-
18
- offset = 0
19
- limit = 20
20
-
21
- orgs = []
22
-
23
- while offset < org_uuids.count do
24
-
25
- returned_orgs = org.find limit: limit, offset: offset
26
-
27
- returned_orgs.each do |r_org|
28
-
29
- r_org.delete("address")
30
- r_org.delete("email")
31
- r_org.delete("organisation")
32
- r_org.delete("phone")
33
- r_org.delete("url")
34
-
35
- end
36
-
37
- orgs.concat(returned_orgs)
38
-
39
- offset += limit
40
-
41
- end
42
-
43
- File.open(output_folder + "/" + filename, "w") do |f|
44
- f.write(orgs.to_json)
45
- end
46
-
47
- end
48
-
49
- end
50
- end
51
- end