djnml 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ class DJNML
2
+ class Delete
3
+ attr_reader :product, :doc_date, :seq, :publisher, :reason
4
+
5
+ def initialize(args = {})
6
+ @product = args[:product] if args[:product]
7
+ @doc_date = Time.parse(args[:doc_date]) if args[:doc_date]
8
+ @seq = args[:seq].to_i if args[:seq]
9
+ @publisher = args[:publisher] if args[:publisher]
10
+ @reason = args[:reason] if args[:reason]
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,244 @@
1
+ # Copyright (c) 2012, Tobias Begalke
2
+ # All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without
5
+ # modification, are permitted provided that the following conditions are met:
6
+ # * Redistributions of source code must retain the above copyright
7
+ # notice, this list of conditions and the following disclaimer.
8
+ # * Redistributions in binary form must reproduce the above copyright
9
+ # notice, this list of conditions and the following disclaimer in the
10
+ # documentation and/or other materials provided with the distribution.
11
+ # * Neither the name of the <organization> nor the
12
+ # names of its contributors may be used to endorse or promote products
13
+ # derived from this software without specific prior written permission.
14
+
15
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16
+ # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ # DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
19
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22
+ # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+
26
+
27
+ class DJNML
28
+ class Modification
29
+ attr_reader :publisher, :doc_date, :product, :seq, :xpath, :mdata,
30
+ :headline, :text, :urgency, :press_cutout, :summary
31
+
32
+ def initialize(args = {})
33
+
34
+ @publisher = args[:publisher] if args[:publisher]
35
+ @doc_date = Time.parse(args[:doc_date]) if args[:doc_date]
36
+ @product = args[:product] if args[:product]
37
+ @seq = args[:seq].to_i if args[:seq]
38
+ xml = args[:xml] if args[:xml]
39
+
40
+ if xml && xml.is_a?(Nokogiri::XML::Element)
41
+ @xpath = xml['xpath']
42
+
43
+ if mdata = xml.search('djn-mdata').to_a.first
44
+ @mdata = Mdata.new(mdata)
45
+ end
46
+
47
+ if headline = xml.search('headline').to_a.first
48
+ @headline = headline.text.strip
49
+ end
50
+
51
+ if text = xml.search('text').to_a.first
52
+ @text = XMLText.new(text)
53
+ end
54
+
55
+ if text = xml.search('summary').to_a.first
56
+ @summary = XMLText.new(text)
57
+ end
58
+
59
+ if press = xml.search('djn-press-cutout').to_a.first
60
+ @press_cutout = press.text.strip
61
+ end
62
+
63
+ if urgency = xml.search('djn-urgency').to_a.first
64
+ @urgency = urgency.text.strip
65
+ end
66
+ else
67
+ @publisher = args['publisher'] if args['publisher']
68
+ @doc_date = Time.parse(args['doc_date']) if args['doc_date']
69
+ @product = args['product'] if args['product']
70
+ @seq = args['seq'].to_i if args['seq']
71
+ @mdata = Mdata.new(args['mdata']) if args['mdata']
72
+ @headline = args['headline'] if args['headline']
73
+ @text = XMLText.new(args['text']) if args['text']
74
+ @summary = XMLText.new(args['summary']) if args['summary']
75
+ @press_cutout = args['press_cutout'] if args['press_cutout']
76
+ @urgency = args['urgency'] if args['urgency']
77
+ end
78
+ end
79
+
80
+
81
+ def fields_to_modify
82
+ fields = []
83
+ [:mdata, :headline, :text, :urgency, :press_cutout, :summary].each do |f|
84
+ if self.send(f)
85
+ fields << f
86
+ end
87
+ end
88
+ fields
89
+ end
90
+
91
+ class XMLText
92
+ attr_reader :text, :html
93
+
94
+ def initialize(data)
95
+ if data.is_a?(Nokogiri::XML::Element)
96
+ @text = data.children.text.strip
97
+ @html = data.children.to_xml
98
+ elsif data.is_a?(Hash)
99
+ @text = data['text']
100
+ @html = data['html']
101
+ end
102
+ end
103
+
104
+ def to_s
105
+ @text.to_s
106
+ end
107
+
108
+ end
109
+
110
+ class Mdata
111
+ attr_reader :company_code, :isin_code, :industry_code, :government_code,
112
+ :page_code, :subject_code, :market_code, :product_code,
113
+ :geo_code, :stat_code, :journal_code, :routing_code,
114
+ :content_code, :function_code
115
+
116
+ def self.from_hash(data)
117
+ self.new(data) if data.is_a?(Hash)
118
+ end
119
+
120
+ def initialize(data = nil)
121
+ return unless data
122
+
123
+ initialize_from_xml(data) if data.is_a?(Nokogiri::XML::Element)
124
+ initialize_from_hash(data) if data.is_a?(Hash)
125
+ end
126
+
127
+ def initialize_from_hash(data)
128
+ @company_code = data['company_code']
129
+ @isin_code = data['isin_code']
130
+ @page_code = data['page_code']
131
+ @industry_code = data['industry_code'].map { |c| ::DJNML::Codes.new(c['symbol']) }
132
+ @government_code = data['government_code'].map { |c| ::DJNML::Codes.new(c['symbol']) }
133
+ @subject_code = data['subject_code'].map { |c| ::DJNML::Codes.new(c['symbol']) }
134
+ @market_code = data['market_code'].map { |c| ::DJNML::Codes.new(c['symbol']) }
135
+ @geo_code = data['geo_code'].map { |c| ::DJNML::Codes.new(c['symbol']) }
136
+ @stat_code = data['stat_code'].map { |c| ::DJNML::Codes.new(c['symbol']) }
137
+ @journal_code = data['journal_code'].map { |c| ::DJNML::Codes.new(c['symbol']) }
138
+ @routing_code = data['routing_code'].map { |c| ::DJNML::Codes.new(c['symbol']) }
139
+ @function_code = data['function_code'].map { |c| ::DJNML::Codes.new(c['symbol']) }
140
+ @product_code = data['product_code'].map { |c| ::DJNML::Codes.new(c['symbol']) }
141
+ end
142
+
143
+ def initialize_from_xml(xml)
144
+ # company
145
+ #
146
+ if tag = xml.search('djn-coding/djn-company/c')
147
+ @company_code = tag.map { |tag| tag.text.strip }
148
+ tag = nil
149
+ end
150
+
151
+ # isin
152
+ #
153
+ if tag = xml.search('djn-coding/djn-isin/c')
154
+ @isin_code = tag.map { |tag| tag.text.strip }
155
+ tag = nil
156
+ end
157
+
158
+ # industry
159
+ #
160
+ if tag = xml.search('djn-coding/djn-industry/c')
161
+ @industry_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
162
+ tag = nil
163
+ end
164
+
165
+ # government
166
+ #
167
+ if tag = xml.search('djn-coding/djn-government/c')
168
+ @government_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
169
+ tag = nil
170
+ end
171
+
172
+ # page
173
+ #
174
+ if tag = xml.search('djn-coding/djn-page/c')
175
+ @page_code = tag.map { tag.text.strip }
176
+ tag = nil
177
+ end
178
+
179
+ # subject
180
+ #
181
+ if tag = xml.search('djn-coding/djn-subject/c')
182
+ @subject_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
183
+ tag = nil
184
+ end
185
+
186
+ # market
187
+ #
188
+ if tag = xml.search('djn-coding/djn-market/c')
189
+ @market_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
190
+ tag = nil
191
+ end
192
+
193
+ # product
194
+ #
195
+ if tag = xml.search('djn-coding/djn-product/c')
196
+ @product_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
197
+ tag = nil
198
+ end
199
+
200
+ # geo
201
+ #
202
+ if tag = xml.search('djn-coding/djn-geo/c')
203
+ @geo_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
204
+ tag = nil
205
+ end
206
+
207
+ # stat
208
+ #
209
+ if tag = xml.search('djn-coding/djn-stat/c')
210
+ @stat_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
211
+ tag = nil
212
+ end
213
+
214
+ # journal
215
+ #
216
+ if tag = xml.search('djn-coding/djn-journal/c')
217
+ @journal_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
218
+ tag = nil
219
+ end
220
+
221
+ # routing
222
+ #
223
+ if tag = xml.search('djn-coding/djn-routing/c')
224
+ @routing_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
225
+ tag = nil
226
+ end
227
+
228
+ # content
229
+ #
230
+ if tag = xml.search('djn-coding/djn-content/c')
231
+ @content_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
232
+ tag = nil
233
+ end
234
+
235
+ # function
236
+ #
237
+ if tag = xml.search('djn-coding/djn-function/c')
238
+ @function_code = tag.map { |tag| ::DJNML::Codes.new(tag.text.strip) }
239
+ tag = nil
240
+ end
241
+ end
242
+ end
243
+ end
244
+ end
data/lib/djnml.rb ADDED
@@ -0,0 +1,417 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright (c) 2012, Tobias Begalke
4
+ # All rights reserved.
5
+ #
6
+ # Redistribution and use in source and binary forms, with or without
7
+ # modification, are permitted provided that the following conditions are met:
8
+ # * Redistributions of source code must retain the above copyright
9
+ # notice, this list of conditions and the following disclaimer.
10
+ # * Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # * Neither the name of the <organization> nor the
14
+ # names of its contributors may be used to endorse or promote products
15
+ # derived from this software without specific prior written permission.
16
+
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18
+ # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20
+ # DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24
+ # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+
28
+
29
+ require 'nokogiri'
30
+ require 'date'
31
+ require 'language_detector'
32
+ require 'djnml/codes'
33
+ require 'djnml/delete'
34
+ require 'djnml/modification'
35
+
36
+ class DJNML
37
+
38
+ attr_reader :msize, :md5, :sys_id, :destination, :dist_id, :transmission_date,
39
+ :publisher, :doc_date, :product, :seq, :lang,
40
+ :news_source, :origin, :service_id,
41
+ :urgency,
42
+ :brand, :temp_perm, :retention, :hot, :original_source,
43
+ :accession_number, :display_date, :page_citation,
44
+ :company_code, :isin_code, :industry_code, :page_code,
45
+ :government_code, :stat_code, :journal_code, :routing_code,
46
+ :content_code, :function_code, :subject_code, :market_code,
47
+ :product_code, :geo_code,
48
+ :headline, :headline_brand, :text, :html, :language,
49
+ :copyright_year, :copyright_holder,
50
+ :website, :company_name, :company_address, :company_zip, :company_city,
51
+ :delete, :modifications
52
+
53
+
54
+ def self.load(filename)
55
+
56
+ if filename
57
+ if ! File.exists?(filename)
58
+ raise FileError.new("#{filename}: no such file!")
59
+ end
60
+
61
+ obj = self.new
62
+ obj.load(filename)
63
+ end
64
+ end
65
+
66
+ def load(filename)
67
+ if ! File.exists?(filename)
68
+ raise FileError.new("#{filename}: no such file!")
69
+ end
70
+
71
+ parser = Nokogiri::XML(open(filename))
72
+
73
+ # doc tag
74
+ #
75
+ begin
76
+ doc = parser.search('/doc').first
77
+ @msize = doc['msize'].to_i
78
+ @md5 = doc['md5']
79
+ @sys_id = doc['sysId']
80
+ @destination = doc['destination']
81
+ @dist_id = doc['distId']
82
+ @transmission_date = Time.parse(doc['transmission-date'])
83
+ rescue
84
+ # ignore errors
85
+ end
86
+
87
+ doc = nil
88
+
89
+ # djnml tag
90
+ #
91
+ begin
92
+ djnml = parser.search('/doc/djnml').first
93
+ @publisher = djnml['publisher']
94
+ @doc_date = Time.parse(djnml['docdate'])
95
+ @product = djnml['product']
96
+ @seq = djnml['seq'].to_i
97
+ @lang = djnml['lang']
98
+ rescue
99
+ # ignore errors
100
+ end
101
+
102
+ djnml = nil
103
+
104
+ # djn-newswires tag
105
+ #
106
+ begin
107
+ newswires = parser.search('/doc/djnml/head/docdata/djn/djn-newswires').first
108
+ @news_source = newswires['news-source']
109
+ @origin = newswires['origin']
110
+ @service_id = newswires['service-id']
111
+ rescue
112
+ # ignore errors
113
+ end
114
+
115
+ newswires = nil
116
+
117
+ # djn-press-cutout tag
118
+ #
119
+ presscutout = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-press-cutout').first
120
+ presscutout = nil
121
+
122
+ # djn-urgency tag
123
+ #
124
+ begin
125
+ urgency = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-urgency').first
126
+ @urgency = urgency.text.strip.squeeze.to_i
127
+ rescue
128
+ # ignore errors
129
+ end
130
+
131
+ urgency = nil
132
+
133
+
134
+ # djn-mdata
135
+ #
136
+ begin
137
+ mdata = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata').first
138
+ @brand = mdata['brand']
139
+ @temp_perm = mdata['temp-perm']
140
+ @retention = mdata['retention']
141
+ @hot = mdata['hot']
142
+ @original_source = mdata['original-source']
143
+ @accession_number = mdata['accession-number']
144
+ @page_citation = mdata['page-citation']
145
+ @display_date = Time.parse(mdata['display-date'])
146
+ rescue
147
+ # ignore errors
148
+ end
149
+
150
+ mdata = nil
151
+
152
+ # coding / company
153
+ #
154
+ begin
155
+ ccompany = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-company/c')
156
+ @company_code = ccompany.map { |tag| tag.text.strip }
157
+ rescue
158
+ # ignore errors
159
+ end
160
+
161
+ ccompany = nil
162
+
163
+ # coding / isin
164
+ #
165
+ begin
166
+ isin = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-isin/c')
167
+ @isin_code = isin.map { |tag| tag.text.strip }
168
+
169
+ rescue
170
+ # ignore errors
171
+ end
172
+
173
+ isin = nil
174
+
175
+ # coding / page
176
+ #
177
+ begin
178
+ page = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-page/c')
179
+ @page_code = page.map { |tag| tag.text.strip }
180
+
181
+ rescue
182
+ # ignore errors
183
+ end
184
+
185
+ page = nil
186
+
187
+
188
+ # coding / industry
189
+ #
190
+ begin
191
+ industry = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-industry/c')
192
+ @industry_code = industry.map { |tag| Codes.new(tag.text.strip) }
193
+ rescue
194
+ # ignore errors
195
+ end
196
+
197
+ industry = nil
198
+
199
+ # coding / government
200
+ #
201
+ begin
202
+ government = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-government/c')
203
+ @government_code = government.map { |tag| Codes.new(tag.text.strip) }
204
+ rescue
205
+ # ignore errors
206
+ end
207
+
208
+ government = nil
209
+
210
+
211
+ # coding / subject
212
+ #
213
+ begin
214
+ subject = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-subject/c')
215
+ @subject_code = subject.map { |tag| Codes.new(tag.text.strip) }
216
+ rescue
217
+ # ignore errors
218
+ end
219
+
220
+ subject = nil
221
+
222
+ # coding / market
223
+ #
224
+ begin
225
+ market = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-market/c')
226
+ @market_code = market.map { |tag| Codes.new(tag.text.strip) }
227
+ rescue
228
+ # ignore errors
229
+ end
230
+
231
+ market = nil
232
+
233
+ # coding / product
234
+ #
235
+ begin
236
+ product = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-product/c')
237
+ @product_code = product.map { |tag| Codes.new(tag.text.strip) }
238
+ rescue
239
+ # ignore errors
240
+ end
241
+
242
+ product = nil
243
+
244
+ # coding / geo
245
+ #
246
+ begin
247
+ geo = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-geo/c')
248
+ @geo_code = geo.map { |tag| Codes.new(tag.text.strip) }
249
+ rescue
250
+ # ignore errors
251
+ end
252
+
253
+ geo = nil
254
+
255
+ # coding / stat
256
+ #
257
+ begin
258
+ stat = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-stat/c')
259
+ @stat_code = stat.map { |tag| Codes.new(tag.text.strip) }
260
+ rescue
261
+ # ignore errors
262
+ end
263
+
264
+ stat = nil
265
+
266
+
267
+ # coding / journal
268
+ #
269
+ begin
270
+ journal = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-journal/c')
271
+ @journal_code = journal.map { |tag| Codes.new(tag.text.strip) }
272
+ rescue
273
+ # ignore errors
274
+ end
275
+
276
+ journal = nil
277
+
278
+
279
+ # coding / routing
280
+ #
281
+ begin
282
+ routing = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-routing/c')
283
+ @routing_code = routing.map { |tag| Codes.new(tag.text.strip) }
284
+ rescue
285
+ # ignore errors
286
+ end
287
+
288
+ routing = nil
289
+
290
+ # coding / content
291
+ #
292
+ begin
293
+ content = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-content/c')
294
+ @content_code = content.map { |tag| Codes.new(tag.text.strip) }
295
+ rescue
296
+ # ignore errors
297
+ end
298
+
299
+ content = nil
300
+
301
+ # coding / function
302
+ #
303
+ begin
304
+ function = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-function/c')
305
+ @function_code = function.map { |tag| Codes.new(tag.text.strip) }
306
+ rescue
307
+ # ignore errors
308
+ end
309
+
310
+ function = nil
311
+
312
+
313
+ # body / headline
314
+ #
315
+ begin
316
+ headline = parser.search('/doc/djnml/body/headline').first
317
+ @headline = headline.text.strip
318
+ @headline_brand = headline['brand-display'] if headline['brand-display']
319
+ rescue
320
+ # ignore errors
321
+ end
322
+
323
+ headline = nil
324
+
325
+ # body / text
326
+ #
327
+ begin
328
+ text = parser.search('/doc/djnml/body/text').first
329
+ @html = text.children.to_xml
330
+ @text = text.children.text.strip
331
+ rescue
332
+ # ignore errors
333
+ end
334
+
335
+ text = nil
336
+
337
+ # copyright
338
+ #
339
+ begin
340
+ copyright = parser.search('/doc/djnml/head/copyright').first
341
+ @copyright_year = copyright['year'].to_s.strip.to_i
342
+ @copyright_holder = copyright['holder']
343
+ rescue
344
+ # ignore errors
345
+ end
346
+
347
+ copyright = nil
348
+
349
+ # website
350
+ #
351
+ begin
352
+ if @text =~ /Internet:\s+(.+?)$/
353
+ @website = $1.strip
354
+ end
355
+ rescue
356
+ # ignore errors
357
+ end
358
+
359
+ if @text =~ /Company:\s+(\S.+?)\s*\n+\s+(\b.+?)\n+\s+(\d+)\s+(\b.+?)\n+/
360
+ @company_name = $1.strip
361
+ @company_address= $2.strip
362
+ @company_zip = $3.strip
363
+ @company_city = $4.strip
364
+ end
365
+
366
+ # language
367
+ #
368
+ begin
369
+ @language = LanguageDetector.instance.detect(@text)
370
+ rescue
371
+ # ignore errors
372
+ end
373
+
374
+ # stories to delete
375
+ #
376
+ begin
377
+ @delete = []
378
+ doc_delete = parser.search('/doc/djnml/administration/doc-delete')
379
+ doc_delete.each do |dd|
380
+ @delete << Delete.new(:product => dd['product'],
381
+ :doc_date => dd['docdate'],
382
+ :seq => dd['seq'],
383
+ :publisher => dd['publisher'],
384
+ :reason => dd['reason'])
385
+ end
386
+ rescue
387
+ # ignore errors
388
+ end
389
+
390
+ # replacements
391
+ #
392
+ @modifications = []
393
+ # begin
394
+ doc_modify = parser.search('/doc/djnml/administration/doc-modify').first
395
+
396
+ mods = parser.search('/doc/djnml/administration/doc-modify/modify-replace')
397
+ mods.each do |m|
398
+ @modifications << Modification.new(:doc_date => doc_modify['docdate'],
399
+ :product => doc_modify['product'],
400
+ :publisher => doc_modify['publisher'],
401
+ :seq => doc_modify['seq'],
402
+ :xml => m)
403
+ end
404
+ # rescue Exception => e
405
+ # ignore errors
406
+ # end
407
+
408
+ self
409
+ end
410
+
411
+ def has_content?
412
+ ! self.text.nil?
413
+ end
414
+
415
+ class FileError < Exception
416
+ end
417
+ end
@@ -0,0 +1,28 @@
1
+ <?xml version="1.0" encoding="ISO-8859-1"?>
2
+ <!DOCTYPE doc SYSTEM "djnml-1.0b.dtd">
3
+ <doc msize="000001968" md5="88d754f61ba4361c72a6a6dd0d2d00d5" sysId="sbknwsdcmn4p1" destination="AW" distId="NHP1" transmission-date="20120716T135050Z" >
4
+ <djnml publisher="DJN" docdate="20120713" product="LL" seq="587" xml:lang="en-us" >
5
+ <administration>
6
+ <doc-delete product="LL" docdate="20110608" seq="001579" publisher="DJN" reason="expire" />
7
+ <doc-delete product="LL" docdate="20110608" seq="001580" publisher="DJN" reason="expire" />
8
+ <doc-delete product="LL" docdate="20110608" seq="001581" publisher="DJN" reason="expire" />
9
+ <doc-delete product="LL" docdate="20110608" seq="001582" publisher="DJN" reason="expire" />
10
+ <doc-delete product="LL" docdate="20110608" seq="001583" publisher="DJN" reason="expire" />
11
+ <doc-delete product="LL" docdate="20110608" seq="001584" publisher="DJN" reason="expire" />
12
+ <doc-delete product="LL" docdate="20110608" seq="001585" publisher="DJN" reason="expire" />
13
+ <doc-delete product="LL" docdate="20110608" seq="001586" publisher="DJN" reason="expire" />
14
+ <doc-delete product="LL" docdate="20110608" seq="001587" publisher="DJN" reason="expire" />
15
+ <doc-delete product="LL" docdate="20110608" seq="001588" publisher="DJN" reason="expire" />
16
+ <doc-delete product="LL" docdate="20110608" seq="001589" publisher="DJN" reason="expire" />
17
+ <doc-delete product="LL" docdate="20110608" seq="001590" publisher="DJN" reason="expire" />
18
+ <doc-delete product="LL" docdate="20110608" seq="001591" publisher="DJN" reason="expire" />
19
+ <doc-delete product="LL" docdate="20110608" seq="001592" publisher="DJN" reason="expire" />
20
+ <doc-delete product="LL" docdate="20110608" seq="001593" publisher="DJN" reason="expire" />
21
+ <doc-delete product="LL" docdate="20110608" seq="001594" publisher="DJN" reason="expire" />
22
+ <doc-delete product="LL" docdate="20110608" seq="001595" publisher="DJN" reason="expire" />
23
+ <doc-delete product="LL" docdate="20110608" seq="001596" publisher="DJN" reason="expire" />
24
+ <doc-delete product="LL" docdate="20110608" seq="001597" publisher="DJN" reason="expire" />
25
+ <doc-delete product="LL" docdate="20110608" seq="001598" publisher="DJN" reason="expire" />
26
+ </administration>
27
+ </djnml>
28
+ </doc>