discovery-indexer 0.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 073a6699afc6ef96ee080e00be2dfe3180bfb734
4
- data.tar.gz: e4361f9aad38081a92598e53fb0f55e564807812
3
+ metadata.gz: f852ef4d15e2ba4dc67d49b1a0ec599ce5791234
4
+ data.tar.gz: 781d104e85d4b7ce490b639ca29b5aa31e03ef7e
5
5
  SHA512:
6
- metadata.gz: 29e0c0dd830b66cc7cfe61bd4da7a42d3ee6fbb55721bf01ecf1f4b1679585d7aae9cc7c6f2a35d77b80b0017b45babea77516b9a4373191b7c8899b0a9a371c
7
- data.tar.gz: 09c61dd4fa8501865bc4081dcb584224de912921eae74bb34121d6aafd05f4a762f45cdaffdf2c0a0282d318bc501629dcb0f1228c48d1fcba9ca769d4ab07db
6
+ metadata.gz: 12bc9321557bc08c608763546a185404d47fe4d8225df32604378f864acde9ceb93f8df48c63e755ffe6fb236b7e5a60ea15453ce2e07ba18f213e37f8249d8a
7
+ data.tar.gz: 91402727cff066c16adf1f29a8b9a156dc2c175de887760b2e343fe49caed6ab97f6c8bfd190b034bf8add13095c60553f71eda75b5890b2c96bddfc82682c7a
@@ -1,3 +1,6 @@
1
+ require 'errors'
2
+ require 'logging'
3
+
1
4
  require 'reader/purlxml'
2
5
  require 'reader/purlxml_reader'
3
6
  require 'reader/purlxml_parser'
@@ -15,7 +18,6 @@ require 'writer/solr_writer'
15
18
 
16
19
  #require 'utilities/extract_sub_targets'
17
20
 
18
- require 'errors'
19
21
 
20
22
  module DiscoveryIndexer
21
23
  PURL_DEFAULT = 'http://purl-test.stanford.edu'
data/lib/logging.rb ADDED
@@ -0,0 +1,16 @@
1
+ require 'logger'
2
+
3
+ module DiscoveryIndexer
4
+ module Logging
5
+ class << self
6
+ def logger
7
+ @logger ||= Logger.new(STDOUT)
8
+ end
9
+
10
+ def logger=(logger)
11
+ @logger = logger
12
+ end
13
+ end
14
+
15
+ end
16
+ end
@@ -5,9 +5,9 @@ module DiscoveryIndexer
5
5
  # This class is the main class to access and parse the mods xml
6
6
  # as retrieved from PURL server
7
7
  # @example to run the code
8
- # druid = "aa111aa1111"
9
- # p = DiscoveryIndexer::InputXml::Modsxml.new(druid)
10
- # model = p.load()
8
+ # druid = "aa111aa1111"
9
+ # p = DiscoveryIndexer::InputXml::Modsxml.new(druid)
10
+ # model = p.load()
11
11
  #
12
12
  #
13
13
  class Modsxml
@@ -4,9 +4,9 @@ module DiscoveryIndexer
4
4
  # This class is the main class to access and parse the purl xml
5
5
  # as retrieved from PURL server
6
6
  # @example to run the code
7
- # druid = "aa111aa1111"
8
- # p = DiscoveryIndexer::InputXml::Purlxml.new(druid)
9
- # model = p.load()
7
+ # druid = "aa111aa1111"
8
+ # p = DiscoveryIndexer::InputXml::Purlxml.new(druid)
9
+ # model = p.load()
10
10
  #
11
11
  class Purlxml
12
12
 
@@ -1,21 +1,75 @@
1
1
  module DiscoveryIndexer
2
2
  module InputXml
3
3
  class PurlxmlModel
4
+
5
+ #@!attribute [rw] public_xml
6
+ # @return [Nokogiri::XML] The publix xml as retrieved from purl server
4
7
  attr_accessor :public_xml
8
+
9
+ #@!attribute [rw] content_metadata
10
+ # @return [Nokogiri::XML] The content_metadata as extracted from public xml
5
11
  attr_accessor :content_metadata
12
+
13
+ #@!attribute [rw] identity_metadata
14
+ # @return [Nokogiri::XML] The identity_metadata as extracted from public xml
6
15
  attr_accessor :identity_metadata
16
+
17
+ #@!attribute [rw] rights_metadata
18
+ # @return [Nokogiri::XML] The rights_metadata as extracted from public xml
7
19
  attr_accessor :rights_metadata
20
+
21
+ #@!attribute [rw] dc
22
+ # @return [Nokogiri::XML] The dc element as extracted from public xml
8
23
  attr_accessor :dc
24
+
25
+ #@!attribute [rw] rdf
26
+ # @return [Nokogiri::XML] The rdf element as extracted from public xml
9
27
  attr_accessor :rdf
28
+
29
+ # @!attribute [rw] release_tags_hash
30
+ # @return [Hash] The release_tag in hash format asextracted from public xml
31
+ # identity_metadata.
32
+ # @example
33
+ # !{"target1"=>true, "target2"=>false}
10
34
  attr_accessor :release_tags_hash
35
+
36
+ # @!attribute [rw] dor_content_type
37
+ # @return [String] The dor_content_type as extracted from public xml
38
+ # content_metadata.
11
39
  attr_accessor :dor_content_type
40
+
41
+ # @!attribute [rw] is_collection
42
+ # @return [Boolean] true if the item type is collection in the identity_metadata
12
43
  attr_accessor :is_collection
44
+
45
+ # @!attribute [rw] collection_druids
46
+ # @return [Array] a list of the collections that this is druid belongs to
47
+ # @example
48
+ # ["aa11aaa1111","bb111bb1111"]
13
49
  attr_accessor :collection_druids
14
- attr_accessor :dor_content_type
50
+
51
+ # @!attribute [rw] file_ids
52
+ # @return [Array] a list of the file ids in the content_metadata
53
+ # @example
54
+ # ["pc0065_b08_f10_i031.txt","pc0065_b08_f10_i032.txt"]
15
55
  attr_accessor :file_ids
56
+
57
+ # @!attribute [rw] image_ids
58
+ # @return [Array] a list of the image ids in the content_metadata
59
+ # @example
60
+ # ["pc0065_b08_f10_i031.jp2","pc0065_b08_f10_i032.jp2"]
16
61
  attr_accessor :image_ids
62
+
63
+ # @!attribute [rw] catkey
64
+ # @return [String] the catkey attribute in identity_metadata
17
65
  attr_accessor :catkey
66
+
67
+ # @!attribute [rw] barcode
68
+ # @return [String] the barcode attribute in identity_metadata
18
69
  attr_accessor :barcode
70
+
71
+ # @!attribute [rw] label
72
+ # @return [String] the objectLabel attribute in identity_metadata
19
73
  attr_accessor :label
20
74
 
21
75
  end
@@ -34,7 +34,7 @@ module DiscoveryIndexer
34
34
 
35
35
  # extracts the identityMetadata for this fedora object, from the purl xml
36
36
  # @return [Nokogiri::XML::Document] the identityMetadata for the fedora object
37
- # @raise [DiscoveryIndexer::Errors::MissingIdentityMetadata] if there is no contentMetadata
37
+ # @raise [DiscoveryIndexer::Errors::MissingIdentityMetadata] if there is no identity_metadata
38
38
  def parse_identity_metadata
39
39
  begin
40
40
  ng_doc = Nokogiri::XML(@purlxml_ng_doc.root.xpath('/publicObject/identityMetadata').to_xml)
@@ -113,13 +113,9 @@ module DiscoveryIndexer
113
113
  # @return [Nokogiri::XML::Document] the contentMetadata for the fedora object
114
114
  # @raise [DiscoveryIndexer::Errors::MissingContentMetadata] if there is no contentMetadata
115
115
  def parse_content_metadata
116
- # begin
117
116
  ng_doc = Nokogiri::XML(@purlxml_ng_doc.root.xpath('/publicObject/contentMetadata').to_xml)
118
- # raise DiscoveryIndexer::Errors::MissingContentMetadata.new(@purlxml_ng_doc.inspect) if !ng_doc || ng_doc.children.empty?
117
+ ng_doc = nil if !ng_doc || ng_doc.children.empty?
119
118
  ng_doc
120
- # rescue
121
- # raise DiscoveryIndexer::Errors::MissingContentMetadata.new(@purlxml_ng_doc.inspect)
122
- # end
123
119
  end
124
120
 
125
121
  # @return true if the identityMetadata has <objectType>collection</objectType>, false otherwise
@@ -153,18 +149,18 @@ module DiscoveryIndexer
153
149
  # @return [String]
154
150
  def parse_dor_content_type
155
151
  content_md = parse_content_metadata
156
- dct = content_md ? content_md.xpath('@type').text : nil
152
+ dct = content_md ? content_md.xpath('contentMetadata/@type').text : nil
157
153
  puts " has no DOR content type (<contentMetadata> element may be missing type attribute)" if !dct || dct.empty?
158
154
  dct
159
155
  end
160
156
 
161
- # the @id attribute of resource/file elements that match the display_type, including extension
157
+ # the @id attribute of resource/file elements that match the image type, including extension
162
158
  # @return [Array<String>] filenames
163
159
  def parse_image_ids
164
160
  ids = []
165
161
  content_md = parse_content_metadata
166
162
  unless content_md.nil?
167
- content_md.xpath('./resource[@type="image"]/file/@id').each { |node|
163
+ content_md.xpath('//resource[@type="image"]/file/@id').each { |node|
168
164
  ids << node.text if !node.text.empty?
169
165
  }
170
166
  return nil if ids.empty?
@@ -172,11 +168,13 @@ module DiscoveryIndexer
172
168
  end
173
169
  end
174
170
 
171
+ # the @id attribute of resource/file elements, including extension
172
+ # @return [Array<String>] filenames
175
173
  def parse_file_ids
176
174
  ids = []
177
175
  content_md = parse_content_metadata
178
176
  unless content_md.nil?
179
- content_md.xpath('./resource/file/@id').each { |node|
177
+ content_md.xpath('//resource/file/@id').each { |node|
180
178
  ids << node.text if !node.text.empty?
181
179
  }
182
180
  return nil if ids.empty?
@@ -184,6 +182,7 @@ module DiscoveryIndexer
184
182
  end
185
183
  end
186
184
 
185
+ # @return catkey value from the DOR identity_metadata, or nil if there is no catkey
187
186
  def parse_catkey
188
187
  catkey = nil
189
188
  node = @purlxml_ng_doc.xpath("/publicObject/identityMetadata/otherId[@name='catkey']")
@@ -191,6 +190,7 @@ module DiscoveryIndexer
191
190
  return catkey
192
191
  end
193
192
 
193
+ # @return barcode value from the DOR identity_metadata, or nil if there is no barcode
194
194
  def parse_barcode
195
195
  barcode = nil
196
196
  node = @purlxml_ng_doc.xpath("/publicObject/identityMetadata/otherId[@name='barcode']")
@@ -198,6 +198,7 @@ module DiscoveryIndexer
198
198
  return barcode
199
199
  end
200
200
 
201
+ # @return objectLabel value from the DOR identity_metadata, or nil if there is no barcode
201
202
  def parse_label
202
203
  label = nil
203
204
  node = @purlxml_ng_doc.xpath("/publicObject/identityMetadata/objectLabel")
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module DiscoveryIndexer
2
- VERSION = '0.1'
2
+ VERSION = '0.2.1'
3
3
  end
@@ -4,7 +4,8 @@ require 'rsolr'
4
4
  module DiscoveryIndexer
5
5
  module Writer
6
6
  class SolrClient
7
-
7
+ include DiscoveryIndexer::Logging
8
+
8
9
  # Add the document to solr, retry if an error occurs.
9
10
  # See https://github.com/ooyala/retries for docs on with_retries.
10
11
  # @param [Hash] solr_doc a Hash representation of the solr document
@@ -24,22 +25,21 @@ module DiscoveryIndexer
24
25
  end
25
26
 
26
27
  def self.process(solr_doc, solr_connector, max_retries, is_delete=false)
27
- logger = Logger.new STDOUT
28
- id = solr_doc[:id]
28
+ id = solr_doc[:id]
29
29
  puts id
30
30
  handler = Proc.new do |exception, attempt_number, total_delay|
31
- logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}"
31
+ DiscoveryIndexer::Logging.logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}"
32
32
  end
33
33
 
34
34
  with_retries(:max_tries => max_retries, :handler => handler, :base_sleep_seconds => 1, :max_sleep_seconds => 5) do |attempt|
35
- logger.debug "Attempt #{attempt} for #{id}"
35
+ DiscoveryIndexer::Logging.logger.debug "Attempt #{attempt} for #{id}"
36
36
 
37
37
  if is_delete
38
38
  solr_connector.delete_by_id(id)
39
- logger.info "Successfully deleted #{id} on attempt #{attempt}"
39
+ DiscoveryIndexer::Logging.logger.info "Successfully deleted #{id} on attempt #{attempt}"
40
40
  else
41
41
  solr_connector.add(solr_doc)
42
- logger.info "Successfully indexed #{id} on attempt #{attempt}"
42
+ DiscoveryIndexer::Logging.logger.info "Successfully indexed #{id} on attempt #{attempt}"
43
43
  end
44
44
 
45
45
  end
@@ -4,6 +4,7 @@ require 'rsolr'
4
4
  module DiscoveryIndexer
5
5
  module Writer
6
6
  class SolrWriter
7
+ include DiscoveryIndexer::Logging
7
8
 
8
9
  def process(druid, index_doc, targets, solr_targets_configs)
9
10
  @solr_targets_configs = solr_targets_configs
@@ -47,8 +48,6 @@ module DiscoveryIndexer
47
48
 
48
49
  def get_connector_for_target(solr_target)
49
50
  solr_connector = nil
50
- puts solr_target
51
- puts @solr_targets_configs
52
51
  if @solr_targets_configs.keys.include?(solr_target) then
53
52
  config = @solr_targets_configs[solr_target]
54
53
  solr_connector = RSolr.connect(config)
metadata CHANGED
@@ -1,125 +1,125 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: discovery-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ahmed AlSum
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-19 00:00:00.000000000 Z
11
+ date: 2015-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - '>='
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: stanford-mods
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - '>='
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: retries
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - '>='
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - '>='
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rsolr
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - '>='
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - '>='
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - '>='
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: webmock
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ">="
87
+ - - '>='
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ">="
94
+ - - '>='
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: equivalent-xml
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ">="
101
+ - - '>='
102
102
  - !ruby/object:Gem::Version
103
103
  version: '0'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ">="
108
+ - - '>='
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: vcr
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - ">="
115
+ - - '>='
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - ">="
122
+ - - '>='
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  description: This library manages the core operations for the discovery indexing such
@@ -131,6 +131,7 @@ extra_rdoc_files: []
131
131
  files:
132
132
  - lib/discovery-indexer.rb
133
133
  - lib/errors.rb
134
+ - lib/logging.rb
134
135
  - lib/mapper/general_mapper.rb
135
136
  - lib/mapper/index_mapper.rb
136
137
  - lib/reader/modsxml.rb
@@ -154,12 +155,12 @@ require_paths:
154
155
  - lib
155
156
  required_ruby_version: !ruby/object:Gem::Requirement
156
157
  requirements:
157
- - - ">="
158
+ - - '>='
158
159
  - !ruby/object:Gem::Version
159
160
  version: '0'
160
161
  required_rubygems_version: !ruby/object:Gem::Requirement
161
162
  requirements:
162
- - - ">="
163
+ - - '>='
163
164
  - !ruby/object:Gem::Version
164
165
  version: '0'
165
166
  requirements: []