discovery-indexer 0.1 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 073a6699afc6ef96ee080e00be2dfe3180bfb734
4
- data.tar.gz: e4361f9aad38081a92598e53fb0f55e564807812
3
+ metadata.gz: f852ef4d15e2ba4dc67d49b1a0ec599ce5791234
4
+ data.tar.gz: 781d104e85d4b7ce490b639ca29b5aa31e03ef7e
5
5
  SHA512:
6
- metadata.gz: 29e0c0dd830b66cc7cfe61bd4da7a42d3ee6fbb55721bf01ecf1f4b1679585d7aae9cc7c6f2a35d77b80b0017b45babea77516b9a4373191b7c8899b0a9a371c
7
- data.tar.gz: 09c61dd4fa8501865bc4081dcb584224de912921eae74bb34121d6aafd05f4a762f45cdaffdf2c0a0282d318bc501629dcb0f1228c48d1fcba9ca769d4ab07db
6
+ metadata.gz: 12bc9321557bc08c608763546a185404d47fe4d8225df32604378f864acde9ceb93f8df48c63e755ffe6fb236b7e5a60ea15453ce2e07ba18f213e37f8249d8a
7
+ data.tar.gz: 91402727cff066c16adf1f29a8b9a156dc2c175de887760b2e343fe49caed6ab97f6c8bfd190b034bf8add13095c60553f71eda75b5890b2c96bddfc82682c7a
@@ -1,3 +1,6 @@
1
+ require 'errors'
2
+ require 'logging'
3
+
1
4
  require 'reader/purlxml'
2
5
  require 'reader/purlxml_reader'
3
6
  require 'reader/purlxml_parser'
@@ -15,7 +18,6 @@ require 'writer/solr_writer'
15
18
 
16
19
  #require 'utilities/extract_sub_targets'
17
20
 
18
- require 'errors'
19
21
 
20
22
  module DiscoveryIndexer
21
23
  PURL_DEFAULT = 'http://purl-test.stanford.edu'
data/lib/logging.rb ADDED
@@ -0,0 +1,16 @@
1
+ require 'logger'
2
+
3
+ module DiscoveryIndexer
4
+ module Logging
5
+ class << self
6
+ def logger
7
+ @logger ||= Logger.new(STDOUT)
8
+ end
9
+
10
+ def logger=(logger)
11
+ @logger = logger
12
+ end
13
+ end
14
+
15
+ end
16
+ end
@@ -5,9 +5,9 @@ module DiscoveryIndexer
5
5
  # This class is the main class to access and parse the mods xml
6
6
  # as retrieved from PURL server
7
7
  # @example to run the code
8
- # druid = "aa111aa1111"
9
- # p = DiscoveryIndexer::InputXml::Modsxml.new(druid)
10
- # model = p.load()
8
+ # druid = "aa111aa1111"
9
+ # p = DiscoveryIndexer::InputXml::Modsxml.new(druid)
10
+ # model = p.load()
11
11
  #
12
12
  #
13
13
  class Modsxml
@@ -4,9 +4,9 @@ module DiscoveryIndexer
4
4
  # This class is the main class to access and parse the purl xml
5
5
  # as retrieved from PURL server
6
6
  # @example to run the code
7
- # druid = "aa111aa1111"
8
- # p = DiscoveryIndexer::InputXml::Purlxml.new(druid)
9
- # model = p.load()
7
+ # druid = "aa111aa1111"
8
+ # p = DiscoveryIndexer::InputXml::Purlxml.new(druid)
9
+ # model = p.load()
10
10
  #
11
11
  class Purlxml
12
12
 
@@ -1,21 +1,75 @@
1
1
  module DiscoveryIndexer
2
2
  module InputXml
3
3
  class PurlxmlModel
4
+
5
+ #@!attribute [rw] public_xml
6
+ # @return [Nokogiri::XML] The publix xml as retrieved from purl server
4
7
  attr_accessor :public_xml
8
+
9
+ #@!attribute [rw] content_metadata
10
+ # @return [Nokogiri::XML] The content_metadata as extracted from public xml
5
11
  attr_accessor :content_metadata
12
+
13
+ #@!attribute [rw] identity_metadata
14
+ # @return [Nokogiri::XML] The identity_metadata as extracted from public xml
6
15
  attr_accessor :identity_metadata
16
+
17
+ #@!attribute [rw] rights_metadata
18
+ # @return [Nokogiri::XML] The rights_metadata as extracted from public xml
7
19
  attr_accessor :rights_metadata
20
+
21
+ #@!attribute [rw] dc
22
+ # @return [Nokogiri::XML] The dc element as extracted from public xml
8
23
  attr_accessor :dc
24
+
25
+ #@!attribute [rw] rdf
26
+ # @return [Nokogiri::XML] The rdf element as extracted from public xml
9
27
  attr_accessor :rdf
28
+
29
+ # @!attribute [rw] release_tags_hash
30
+ # @return [Hash] The release_tag in hash format asextracted from public xml
31
+ # identity_metadata.
32
+ # @example
33
+ # !{"target1"=>true, "target2"=>false}
10
34
  attr_accessor :release_tags_hash
35
+
36
+ # @!attribute [rw] dor_content_type
37
+ # @return [String] The dor_content_type as extracted from public xml
38
+ # content_metadata.
11
39
  attr_accessor :dor_content_type
40
+
41
+ # @!attribute [rw] is_collection
42
+ # @return [Boolean] true if the item type is collection in the identity_metadata
12
43
  attr_accessor :is_collection
44
+
45
+ # @!attribute [rw] collection_druids
46
+ # @return [Array] a list of the collections that this is druid belongs to
47
+ # @example
48
+ # ["aa11aaa1111","bb111bb1111"]
13
49
  attr_accessor :collection_druids
14
- attr_accessor :dor_content_type
50
+
51
+ # @!attribute [rw] file_ids
52
+ # @return [Array] a list of the file ids in the content_metadata
53
+ # @example
54
+ # ["pc0065_b08_f10_i031.txt","pc0065_b08_f10_i032.txt"]
15
55
  attr_accessor :file_ids
56
+
57
+ # @!attribute [rw] image_ids
58
+ # @return [Array] a list of the image ids in the content_metadata
59
+ # @example
60
+ # ["pc0065_b08_f10_i031.jp2","pc0065_b08_f10_i032.jp2"]
16
61
  attr_accessor :image_ids
62
+
63
+ # @!attribute [rw] catkey
64
+ # @return [String] the catkey attribute in identity_metadata
17
65
  attr_accessor :catkey
66
+
67
+ # @!attribute [rw] barcode
68
+ # @return [String] the barcode attribute in identity_metadata
18
69
  attr_accessor :barcode
70
+
71
+ # @!attribute [rw] label
72
+ # @return [String] the objectLabel attribute in identity_metadata
19
73
  attr_accessor :label
20
74
 
21
75
  end
@@ -34,7 +34,7 @@ module DiscoveryIndexer
34
34
 
35
35
  # extracts the identityMetadata for this fedora object, from the purl xml
36
36
  # @return [Nokogiri::XML::Document] the identityMetadata for the fedora object
37
- # @raise [DiscoveryIndexer::Errors::MissingIdentityMetadata] if there is no contentMetadata
37
+ # @raise [DiscoveryIndexer::Errors::MissingIdentityMetadata] if there is no identity_metadata
38
38
  def parse_identity_metadata
39
39
  begin
40
40
  ng_doc = Nokogiri::XML(@purlxml_ng_doc.root.xpath('/publicObject/identityMetadata').to_xml)
@@ -113,13 +113,9 @@ module DiscoveryIndexer
113
113
  # @return [Nokogiri::XML::Document] the contentMetadata for the fedora object
114
114
  # @raise [DiscoveryIndexer::Errors::MissingContentMetadata] if there is no contentMetadata
115
115
  def parse_content_metadata
116
- # begin
117
116
  ng_doc = Nokogiri::XML(@purlxml_ng_doc.root.xpath('/publicObject/contentMetadata').to_xml)
118
- # raise DiscoveryIndexer::Errors::MissingContentMetadata.new(@purlxml_ng_doc.inspect) if !ng_doc || ng_doc.children.empty?
117
+ ng_doc = nil if !ng_doc || ng_doc.children.empty?
119
118
  ng_doc
120
- # rescue
121
- # raise DiscoveryIndexer::Errors::MissingContentMetadata.new(@purlxml_ng_doc.inspect)
122
- # end
123
119
  end
124
120
 
125
121
  # @return true if the identityMetadata has <objectType>collection</objectType>, false otherwise
@@ -153,18 +149,18 @@ module DiscoveryIndexer
153
149
  # @return [String]
154
150
  def parse_dor_content_type
155
151
  content_md = parse_content_metadata
156
- dct = content_md ? content_md.xpath('@type').text : nil
152
+ dct = content_md ? content_md.xpath('contentMetadata/@type').text : nil
157
153
  puts " has no DOR content type (<contentMetadata> element may be missing type attribute)" if !dct || dct.empty?
158
154
  dct
159
155
  end
160
156
 
161
- # the @id attribute of resource/file elements that match the display_type, including extension
157
+ # the @id attribute of resource/file elements that match the image type, including extension
162
158
  # @return [Array<String>] filenames
163
159
  def parse_image_ids
164
160
  ids = []
165
161
  content_md = parse_content_metadata
166
162
  unless content_md.nil?
167
- content_md.xpath('./resource[@type="image"]/file/@id').each { |node|
163
+ content_md.xpath('//resource[@type="image"]/file/@id').each { |node|
168
164
  ids << node.text if !node.text.empty?
169
165
  }
170
166
  return nil if ids.empty?
@@ -172,11 +168,13 @@ module DiscoveryIndexer
172
168
  end
173
169
  end
174
170
 
171
+ # the @id attribute of resource/file elements, including extension
172
+ # @return [Array<String>] filenames
175
173
  def parse_file_ids
176
174
  ids = []
177
175
  content_md = parse_content_metadata
178
176
  unless content_md.nil?
179
- content_md.xpath('./resource/file/@id').each { |node|
177
+ content_md.xpath('//resource/file/@id').each { |node|
180
178
  ids << node.text if !node.text.empty?
181
179
  }
182
180
  return nil if ids.empty?
@@ -184,6 +182,7 @@ module DiscoveryIndexer
184
182
  end
185
183
  end
186
184
 
185
+ # @return catkey value from the DOR identity_metadata, or nil if there is no catkey
187
186
  def parse_catkey
188
187
  catkey = nil
189
188
  node = @purlxml_ng_doc.xpath("/publicObject/identityMetadata/otherId[@name='catkey']")
@@ -191,6 +190,7 @@ module DiscoveryIndexer
191
190
  return catkey
192
191
  end
193
192
 
193
+ # @return barcode value from the DOR identity_metadata, or nil if there is no barcode
194
194
  def parse_barcode
195
195
  barcode = nil
196
196
  node = @purlxml_ng_doc.xpath("/publicObject/identityMetadata/otherId[@name='barcode']")
@@ -198,6 +198,7 @@ module DiscoveryIndexer
198
198
  return barcode
199
199
  end
200
200
 
201
+ # @return objectLabel value from the DOR identity_metadata, or nil if there is no barcode
201
202
  def parse_label
202
203
  label = nil
203
204
  node = @purlxml_ng_doc.xpath("/publicObject/identityMetadata/objectLabel")
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module DiscoveryIndexer
2
- VERSION = '0.1'
2
+ VERSION = '0.2.1'
3
3
  end
@@ -4,7 +4,8 @@ require 'rsolr'
4
4
  module DiscoveryIndexer
5
5
  module Writer
6
6
  class SolrClient
7
-
7
+ include DiscoveryIndexer::Logging
8
+
8
9
  # Add the document to solr, retry if an error occurs.
9
10
  # See https://github.com/ooyala/retries for docs on with_retries.
10
11
  # @param [Hash] solr_doc a Hash representation of the solr document
@@ -24,22 +25,21 @@ module DiscoveryIndexer
24
25
  end
25
26
 
26
27
  def self.process(solr_doc, solr_connector, max_retries, is_delete=false)
27
- logger = Logger.new STDOUT
28
- id = solr_doc[:id]
28
+ id = solr_doc[:id]
29
29
  puts id
30
30
  handler = Proc.new do |exception, attempt_number, total_delay|
31
- logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}"
31
+ DiscoveryIndexer::Logging.logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}"
32
32
  end
33
33
 
34
34
  with_retries(:max_tries => max_retries, :handler => handler, :base_sleep_seconds => 1, :max_sleep_seconds => 5) do |attempt|
35
- logger.debug "Attempt #{attempt} for #{id}"
35
+ DiscoveryIndexer::Logging.logger.debug "Attempt #{attempt} for #{id}"
36
36
 
37
37
  if is_delete
38
38
  solr_connector.delete_by_id(id)
39
- logger.info "Successfully deleted #{id} on attempt #{attempt}"
39
+ DiscoveryIndexer::Logging.logger.info "Successfully deleted #{id} on attempt #{attempt}"
40
40
  else
41
41
  solr_connector.add(solr_doc)
42
- logger.info "Successfully indexed #{id} on attempt #{attempt}"
42
+ DiscoveryIndexer::Logging.logger.info "Successfully indexed #{id} on attempt #{attempt}"
43
43
  end
44
44
 
45
45
  end
@@ -4,6 +4,7 @@ require 'rsolr'
4
4
  module DiscoveryIndexer
5
5
  module Writer
6
6
  class SolrWriter
7
+ include DiscoveryIndexer::Logging
7
8
 
8
9
  def process(druid, index_doc, targets, solr_targets_configs)
9
10
  @solr_targets_configs = solr_targets_configs
@@ -47,8 +48,6 @@ module DiscoveryIndexer
47
48
 
48
49
  def get_connector_for_target(solr_target)
49
50
  solr_connector = nil
50
- puts solr_target
51
- puts @solr_targets_configs
52
51
  if @solr_targets_configs.keys.include?(solr_target) then
53
52
  config = @solr_targets_configs[solr_target]
54
53
  solr_connector = RSolr.connect(config)
metadata CHANGED
@@ -1,125 +1,125 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: discovery-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ahmed AlSum
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-19 00:00:00.000000000 Z
11
+ date: 2015-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - '>='
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: stanford-mods
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - '>='
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: retries
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - '>='
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - '>='
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rsolr
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - '>='
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - '>='
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - '>='
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: webmock
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ">="
87
+ - - '>='
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ">="
94
+ - - '>='
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: equivalent-xml
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ">="
101
+ - - '>='
102
102
  - !ruby/object:Gem::Version
103
103
  version: '0'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ">="
108
+ - - '>='
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: vcr
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - ">="
115
+ - - '>='
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - ">="
122
+ - - '>='
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  description: This library manages the core operations for the discovery indexing such
@@ -131,6 +131,7 @@ extra_rdoc_files: []
131
131
  files:
132
132
  - lib/discovery-indexer.rb
133
133
  - lib/errors.rb
134
+ - lib/logging.rb
134
135
  - lib/mapper/general_mapper.rb
135
136
  - lib/mapper/index_mapper.rb
136
137
  - lib/reader/modsxml.rb
@@ -154,12 +155,12 @@ require_paths:
154
155
  - lib
155
156
  required_ruby_version: !ruby/object:Gem::Requirement
156
157
  requirements:
157
- - - ">="
158
+ - - '>='
158
159
  - !ruby/object:Gem::Version
159
160
  version: '0'
160
161
  required_rubygems_version: !ruby/object:Gem::Requirement
161
162
  requirements:
162
- - - ">="
163
+ - - '>='
163
164
  - !ruby/object:Gem::Version
164
165
  version: '0'
165
166
  requirements: []