dti_nitf 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +14 -1
- data/VERSION +1 -1
- data/dti_nitf.gemspec +6 -4
- data/lib/dti_nitf.rb +2 -2
- data/lib/dti_nitf/story.rb +19 -7
- data/test/test_dti_nitf_correx.rb +55 -0
- metadata +4 -2
data/README.rdoc
CHANGED
@@ -2,6 +2,19 @@
|
|
2
2
|
|
3
3
|
This library helps you processes the "XML" (quotes are intentional) from DTI's export software into valid NITF documents or Story & Media objects.
|
4
4
|
|
5
|
+
== usage
|
6
|
+
gem 'dti_nitf'
|
7
|
+
require 'dti_nitf'
|
8
|
+
|
9
|
+
==example
|
10
|
+
story = DTI::Story.new(File.read('/tmp/02/AIRPORT 021010.xml'))
|
11
|
+
=> #<DTI::Story:0x10207d598
|
12
|
+
story.hl1
|
13
|
+
=> "Master plan to focus on attracting new tenants"
|
14
|
+
story.pub_date
|
15
|
+
=> "2010-02-10"
|
16
|
+
|
17
|
+
|
5
18
|
== Note on Patches/Pull Requests
|
6
19
|
|
7
20
|
* Fork the project.
|
@@ -14,4 +27,4 @@ This library helps you processes the "XML" (quotes are intentional) from DTI's e
|
|
14
27
|
|
15
28
|
== Copyright
|
16
29
|
|
17
|
-
Copyright (c) 2010 Mark Turner. See LICENSE for details.
|
30
|
+
Copyright (c) 2010 Mark Turner & Western Communications Inc. See LICENSE for details.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/dti_nitf.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{dti_nitf}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Mark Turner"]
|
12
|
-
s.date = %q{2010-02-
|
12
|
+
s.date = %q{2010-02-23}
|
13
13
|
s.description = %q{Helps you processes the 'XML' (Quotes intentional) from DTI's XML export into valid NITF documents or Story & Media objects}
|
14
14
|
s.email = %q{mark@amerine.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -29,7 +29,8 @@ Gem::Specification.new do |s|
|
|
29
29
|
"lib/dti_nitf/nitf.rb",
|
30
30
|
"lib/dti_nitf/story.rb",
|
31
31
|
"test/helper.rb",
|
32
|
-
"test/test_dti_nitf.rb"
|
32
|
+
"test/test_dti_nitf.rb",
|
33
|
+
"test/test_dti_nitf_correx.rb"
|
33
34
|
]
|
34
35
|
s.homepage = %q{http://github.com/amerine/dti_nitf}
|
35
36
|
s.rdoc_options = ["--charset=UTF-8"]
|
@@ -38,7 +39,8 @@ Gem::Specification.new do |s|
|
|
38
39
|
s.summary = %q{Process DTI's XML export into valid NITF documents or Story & Media objects}
|
39
40
|
s.test_files = [
|
40
41
|
"test/helper.rb",
|
41
|
-
"test/test_dti_nitf.rb"
|
42
|
+
"test/test_dti_nitf.rb",
|
43
|
+
"test/test_dti_nitf_correx.rb"
|
42
44
|
]
|
43
45
|
|
44
46
|
if s.respond_to? :specification_version then
|
data/lib/dti_nitf.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
dir = File.dirname(__FILE__)
|
2
|
-
%w[rubygems
|
2
|
+
%w[rubygems crack].each { |x| require x}
|
3
3
|
require File.join(dir, 'dti_nitf/nitf')
|
4
4
|
require File.join(dir, 'dti_nitf/story')
|
5
5
|
|
6
|
-
#file_contents = File.read('/tmp/02/
|
6
|
+
#file_contents = File.read('/tmp/02/CORREX 021410.xml')
|
7
7
|
#story = DTI::Story.new(file_contents)
|
8
8
|
|
9
9
|
#pp story
|
data/lib/dti_nitf/story.rb
CHANGED
@@ -3,13 +3,21 @@ module DTI
|
|
3
3
|
attr_accessor :raw_xml, :doc_id, :copyright_holder, :doc_name
|
4
4
|
attr_accessor :publication, :section, :pub_date, :page
|
5
5
|
attr_accessor :body, :byline, :paper, :hl1, :hl2, :tagline
|
6
|
+
attr_accessor :correction, :original_story_id
|
6
7
|
|
7
8
|
def initialize(xml)
|
8
9
|
self.raw_xml = DTI::NITF.parse(xml)
|
10
|
+
self.correction = false
|
9
11
|
|
10
12
|
cracked = Crack::XML.parse(self.raw_xml)
|
11
13
|
#pp cracked
|
12
14
|
|
15
|
+
if cracked["nitf"]['head']['original_storyid']
|
16
|
+
self.correction = true
|
17
|
+
self.original_story_id = cracked["nitf"]['head']['original_storyid'].to_i
|
18
|
+
self.hl1 = "Correction"
|
19
|
+
end
|
20
|
+
|
13
21
|
doc_data = cracked["nitf"]["head"]["docdata"]
|
14
22
|
pub_data = cracked["nitf"]["head"]["pubdata"]
|
15
23
|
doc_body = cracked["nitf"]["body"]
|
@@ -26,15 +34,19 @@ module DTI
|
|
26
34
|
self.body = join_hash(doc_body["body.content"]["p"])
|
27
35
|
self.body = fix_quotes(self.body)
|
28
36
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
37
|
+
if !self.correction?
|
38
|
+
self.byline = doc_body["body.head"]["byline"]["person"].gsub!(/^By\s/, '').rstrip!
|
39
|
+
self.paper = doc_body["body.head"]["byline"]["byttl"].rstrip!
|
40
|
+
self.hl1 = doc_body["body.head"]["hedline"]["hl1"].to_s.rstrip
|
41
|
+
self.hl2 = doc_body["body.head"]["hedline"]["hl2"].to_s.lstrip.rstrip
|
42
|
+
self.tagline = doc_body["body.end"]["tagline"].to_s.lstrip.rstrip
|
43
|
+
end
|
36
44
|
end
|
37
45
|
|
46
|
+
def correction?
|
47
|
+
self.correction
|
48
|
+
end
|
49
|
+
|
38
50
|
def join_hash(hash)
|
39
51
|
hash_string =""
|
40
52
|
hash.each { |h|
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestDtiNitfCorrex < Test::Unit::TestCase
|
4
|
+
@@xml = <<-EOT
|
5
|
+
<?xml-stylesheet type="text/css" href="bulletin.css"?>
|
6
|
+
<!DOCTYPE nitf SYSTEM "nitf-3-2.dtd">
|
7
|
+
<nitf>
|
8
|
+
<head>
|
9
|
+
|
10
|
+
<docdata>
|
11
|
+
<doc.copyright holder="Western Communications, Inc."/>
|
12
|
+
<doc-id id-string="14022204"/>
|
13
|
+
<doc-name name-string="CORREX 021410"/>
|
14
|
+
</docdata>
|
15
|
+
<pubdata name="The Bulletin " position.sequence="1 " position.section="A " date.publication="2010-02-14 "/>
|
16
|
+
<original-storyid>14015998</original-storyid>
|
17
|
+
</head>
|
18
|
+
<body>
|
19
|
+
<body.head>
|
20
|
+
<hedline>
|
21
|
+
<hl1></hl1><hl2></hl2>
|
22
|
+
</hedline>
|
23
|
+
<byline>
|
24
|
+
</byline>
|
25
|
+
</body.head>
|
26
|
+
<body.content>
|
27
|
+
<hl2><p> Correction </p></hl2><p> In a column titled <!-- 201c(unknown) -->The power and joy of libraries,<!-- 201d(unknown) --> which appears today on Page F1, Deschutes Public Library Director Todd Dunkelberg<!-- 2019(unknown) -->s name is incorrect. </p><p> The Bulletin regrets the error.</p>
|
28
|
+
|
29
|
+
|
30
|
+
</body.content>
|
31
|
+
<body.end>
|
32
|
+
</body.end>
|
33
|
+
</body>
|
34
|
+
</nitf>
|
35
|
+
EOT
|
36
|
+
|
37
|
+
context "A Story Correction" do
|
38
|
+
setup do
|
39
|
+
@story = DTI::Story.new(@@xml)
|
40
|
+
end
|
41
|
+
|
42
|
+
should "have an orginal story id" do
|
43
|
+
assert @story.correction?
|
44
|
+
assert @story.original_story_id == 14015998
|
45
|
+
end
|
46
|
+
|
47
|
+
should "have a body" do
|
48
|
+
assert @story.body == "<p> In a column titled \"The power and joy of libraries,\" which appears today on Page F1, Deschutes Public Library Director Todd Dunkelberg’s name is incorrect. </p><p> The Bulletin regrets the error.</p>"
|
49
|
+
end
|
50
|
+
|
51
|
+
should "have a hl1" do
|
52
|
+
assert @story.hl1 == "Correction"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dti_nitf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mark Turner
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-23 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -55,6 +55,7 @@ files:
|
|
55
55
|
- lib/dti_nitf/story.rb
|
56
56
|
- test/helper.rb
|
57
57
|
- test/test_dti_nitf.rb
|
58
|
+
- test/test_dti_nitf_correx.rb
|
58
59
|
has_rdoc: true
|
59
60
|
homepage: http://github.com/amerine/dti_nitf
|
60
61
|
licenses: []
|
@@ -86,3 +87,4 @@ summary: Process DTI's XML export into valid NITF documents or Story & Media obj
|
|
86
87
|
test_files:
|
87
88
|
- test/helper.rb
|
88
89
|
- test/test_dti_nitf.rb
|
90
|
+
- test/test_dti_nitf_correx.rb
|