stanford-mods 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 77f36520f38dfd83cd8cc79480fc5fbda45b2df6
4
- data.tar.gz: b1e7a4db9be39cdbeeca3124a62e3c0403ba31a2
3
+ metadata.gz: fda4f5ab13e13c81db05b27fd3ca5e5e17be0e1d
4
+ data.tar.gz: 13a2ee552756a2e700ee102ada1b56b55d5cda82
5
5
  SHA512:
6
- metadata.gz: fd0ab891b9f578611b928231dcf45debcffc209068032fdbcd98d93ada56e95fc5afa82be97dfc163287297e190ff1ef258abef27dcd58170ec18152381da398
7
- data.tar.gz: f604d45d319fbce30215ff436ad62ebc23a0bab11d995d6599bef137dae9a5465344d69d2492d84b29436711106ae7e80ce9090cb3005c3be4fe1fb004b58b7e
6
+ metadata.gz: 63c3efbf88ea9d76b80182c490cb01c3d6d404858ce7d91f46b1d98c3df284cf8dad19cf6b55eb2185a1779bafd8c750dde4e04cf39e64f6c810cf030cc80359
7
+ data.tar.gz: 275a160cf3603693e5801f0d5ff530995463c00a5f77625391fc09efaca8b13f0621f5a2e5034de5e1ae9cbb27d7a59fa0dcaf53aac877d057e65e004ecc3627
data/lib/stanford-mods.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'mods'
2
2
  require 'stanford-mods/date_parsing'
3
+ require 'stanford-mods/coordinate'
3
4
  require 'stanford-mods/geo_spatial'
4
5
  require 'stanford-mods/name'
5
6
  require 'stanford-mods/origin_info'
@@ -0,0 +1,81 @@
1
+ # encoding: utf-8
2
+ module Stanford
3
+ module Mods
4
+ ##
5
+ # Geospatial coordinate parsing
6
+ class Coordinate
7
+ attr_reader :value
8
+
9
+ def initialize(value)
10
+ @value = value
11
+ end
12
+
13
+ # Convert the coordinate to a WKT/CQL ENVELOPE representation
14
+ def as_envelope
15
+ return unless valid?
16
+
17
+ "ENVELOPE(#{bounds[:min_x]}, #{bounds[:max_x]}, #{bounds[:min_y]}, #{bounds[:max_y]})"
18
+ end
19
+
20
+ # Convert the coordinate to a Solr 4.x bbox-format representation
21
+ def as_bbox
22
+ return unless valid?
23
+
24
+ "#{bounds[:min_x]} #{bounds[:min_y]} #{bounds[:max_x]} #{bounds[:max_y]}"
25
+ end
26
+
27
+ def valid?
28
+ return false if bounds.empty?
29
+
30
+ range_x = -180.0..180.0
31
+ range_y = -90.0..90.0
32
+
33
+ range_x.include?(bounds[:min_x]) &&
34
+ range_x.include?(bounds[:max_x]) &&
35
+ range_y.include?(bounds[:min_y]) &&
36
+ range_y.include?(bounds[:max_y])
37
+ end
38
+
39
+ private
40
+
41
+ def bounds
42
+ @bounds ||= begin
43
+ matches = coord.match %r{\A(?<lat>[EW].+-+.+)\s*/\s*(?<lng>[NS].+-+.+)\Z}
44
+
45
+ if matches
46
+ min_x, max_x = matches['lat'].split(/-+/).map { |x| coord_to_decimal(x) }.minmax
47
+ min_y, max_y = matches['lng'].split(/-+/).map { |y| coord_to_decimal(y) }.minmax
48
+
49
+ { min_x: min_x, min_y: min_y, max_x: max_x, max_y: max_y }
50
+ else
51
+ {}
52
+ end
53
+ end
54
+ end
55
+
56
+ def coord
57
+ matches = value.match(/^\(?([^)]+)\)?\.?$/)
58
+
59
+ if matches
60
+ matches[1]
61
+ else
62
+ value
63
+ end
64
+ end
65
+
66
+ def coord_to_decimal(point)
67
+ regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
68
+ match = regex.match(point)
69
+
70
+ return Float::INFINITY unless match
71
+
72
+ dec = match['deg'].to_i
73
+ dec += match['min'].to_f / 60
74
+ dec += match['sec'].to_f / 60 / 60
75
+ dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
76
+
77
+ dec
78
+ end
79
+ end
80
+ end
81
+ end
@@ -10,50 +10,23 @@ module Stanford
10
10
  Array(@mods_ng_xml.subject.cartographics.coordinates).map(&:text)
11
11
  end
12
12
 
13
- def point_bbox
13
+ def coordinates_as_envelope
14
14
  coordinates.map do |n|
15
- matches = n.match(/^\(?([^)]+)\)?\.?$/)
15
+ c = Stanford::Mods::Coordinate.new(n)
16
16
 
17
- if matches
18
- coord_to_bbox(matches[1])
19
- else
20
- coord_to_bbox(n)
21
- end
17
+ c.as_envelope if c.valid?
22
18
  end.compact
23
19
  end
24
20
 
25
- private
26
-
27
- def coord_to_bbox(coord)
28
- matches = coord.match %r{\A(?<lat>[EW].+-+.+)\s*/\s*(?<lng>[NS].+-+.+)\Z}
29
- return unless matches
30
-
31
- min_x, max_x = matches['lat'].split(/-+/).map { |x| coord_to_decimal(x) }.minmax
32
- min_y, max_y = matches['lng'].split(/-+/).map { |y| coord_to_decimal(y) }.minmax
33
-
34
- "#{min_x} #{min_y} #{max_x} #{max_y}" if valid_bbox?(min_x, max_x, min_y, max_y)
35
- end
36
-
37
- def coord_to_decimal(point)
38
- regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
39
- match = regex.match(point)
40
-
41
- return Float::INFINITY unless match
42
-
43
- dec = match['deg'].to_i
44
- dec += match['min'].to_f / 60
45
- dec += match['sec'].to_f / 60 / 60
46
- dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
21
+ def coordinates_as_bbox
22
+ coordinates.map do |n|
23
+ c = Stanford::Mods::Coordinate.new(n)
47
24
 
48
- dec
25
+ c.as_bbox if c.valid?
26
+ end.compact
49
27
  end
50
28
 
51
- def valid_bbox?(min_x, max_x, min_y, max_y)
52
- range_x = -180.0..180.0
53
- range_y = -90.0..90.0
54
-
55
- range_x.include?(min_x) && range_x.include?(max_x) && range_y.include?(min_y) && range_y.include?(max_y)
56
- end
29
+ alias point_bbox coordinates_as_bbox
57
30
  end # class Record
58
31
  end # Module Mods
59
32
  end # Module Stanford
@@ -1,6 +1,6 @@
1
1
  module Stanford
2
2
  module Mods
3
3
  # this is the Ruby Gem version
4
- VERSION = "1.4.0"
4
+ VERSION = "1.5.0"
5
5
  end
6
6
  end
@@ -53,7 +53,7 @@ describe "Cartographic coordinates" do
53
53
  end
54
54
  end
55
55
 
56
- context "point_bbox" do
56
+ describe "#coordinates_as_bbox" do
57
57
  it "returns empty array if no coordinates in the mods" do
58
58
  smods_rec.from_str(no_coord)
59
59
  expect(smods_rec.point_bbox).to eq([])
@@ -66,44 +66,20 @@ describe "Cartographic coordinates" do
66
66
  smods_rec.from_str(with_coords)
67
67
  expect(smods_rec.point_bbox).to eq(["-16.0 -15.0 28.0 13.0"])
68
68
  end
69
+ end
69
70
 
70
- {
71
- %((W 123°23ʹ16ʺ--W 122°31ʹ22ʺ/N 39°23ʹ57ʺ--N 38°17ʹ53ʺ)) =>
72
- ['-123.38777777777779 38.29805555555556 -122.52277777777778 39.399166666666666'],
73
- %(E 10°03'00"--E 12°58'00"/N 45°00'00"--N 41°46'00") =>
74
- ['10.05 41.766666666666666 12.966666666666667 45.0'],
75
- %(E 8°41'-E 12°21'/N 46°04'-N 44°23') =>
76
- ['8.683333333333334 44.38333333333333 12.35 46.06666666666667'],
77
- %((E17°--E11°/N14°--N18°).) =>
78
- ['11.0 14.0 17.0 18.0'], # coordinates need to be reordered
79
- %((W 170⁰--E 55⁰/N 40⁰--S 36⁰).) =>
80
- ['-170.0 -36.0 55.0 40.0'], # superscript 0 is almost a degree character..
81
- %(W80°--E100°/N487°--S42°) =>
82
- [], # N487 is out of bounds for the bounding box
83
- %((W 0°-W 0°/S 90°---S 90°)) =>
84
- ['-0.0 -90.0 -0.0 -90.0'], # one dash, two dashes, three dashes.. what's the difference?
85
- %(W 650--W 100/N 700--N 550) =>
86
- [] # missing degree character, and all coordinates are out of bounds.
87
- }.each do |value, expected|
88
- describe 'data mappings' do
89
- let(:mods) do
90
- <<-EOF
91
- <mods xmlns="#{Mods::MODS_NS}">
92
- <subject>
93
- <cartographics>
94
- <coordinates>#{value}</coordinates>
95
- </cartographics>
96
- </subject>
97
- </mods>
98
- EOF
99
- end
100
-
101
- let(:smods_rec) { Stanford::Mods::Record.new.from_str(mods) }
102
-
103
- it 'maps to the right bounding box' do
104
- expect(smods_rec.point_bbox).to eq expected
105
- end
106
- end
71
+ describe "#coordinates_as_envelope" do
72
+ it "returns empty array if no coordinates in the mods" do
73
+ smods_rec.from_str(no_coord)
74
+ expect(smods_rec.coordinates_as_envelope).to eq([])
75
+ end
76
+ it "returns empty array if bad data is in the mods" do
77
+ smods_rec.from_str(with_bad_data)
78
+ expect(smods_rec.coordinates_as_envelope).to eq([])
79
+ end
80
+ it "returns decimal representation of latitude and longitude" do
81
+ smods_rec.from_str(with_coords)
82
+ expect(smods_rec.coordinates_as_envelope).to eq(["ENVELOPE(-16.0, 28.0, -15.0, 13.0)"])
107
83
  end
108
84
  end
109
85
  end # describe Cartographic coordinates
@@ -0,0 +1,84 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Stanford::Mods::Coordinate do
5
+ describe '#valid' do
6
+ it 'is valid for well-formed coordinates' do
7
+ expect(described_class.new('W 123°23ʹ16ʺ--W 122°31ʹ22ʺ/N 39°23ʹ57ʺ--N 38°17ʹ53ʺ')).to be_valid
8
+ end
9
+
10
+ it 'rejects out-of-bounds coordinates' do
11
+ expect(described_class.new('W80°--E100°/N487°--S42°')).not_to be_valid
12
+ end
13
+
14
+ it 'rejects coordinates without degree symbols' do
15
+ expect(described_class.new('W 650--W 100/N 700--N 550')).not_to be_valid
16
+ end
17
+
18
+ it 'rejects malformed coordinates' do
19
+ expect(described_class.new('(E29°--E35/°S12°--S16°).')).not_to be_valid
20
+ end
21
+ end
22
+
23
+ describe '#as_bbox' do
24
+ it 'is nil for invalid data' do
25
+ expect(described_class.new('x').as_bbox).to eq nil
26
+ end
27
+ end
28
+
29
+ describe '#as_envelope' do
30
+ it 'is nil for invalid data' do
31
+ expect(described_class.new('x').as_envelope).to eq nil
32
+ end
33
+ end
34
+
35
+ context '#as_bbox' do
36
+ {
37
+ %((W 123°23ʹ16ʺ--W 122°31ʹ22ʺ/N 39°23ʹ57ʺ--N 38°17ʹ53ʺ)) =>
38
+ '-123.38777777777779 38.29805555555556 -122.52277777777778 39.399166666666666',
39
+ %(E 10°03'00"--E 12°58'00"/N 45°00'00"--N 41°46'00") =>
40
+ '10.05 41.766666666666666 12.966666666666667 45.0',
41
+ %(E 8°41'-E 12°21'/N 46°04'-N 44°23') =>
42
+ '8.683333333333334 44.38333333333333 12.35 46.06666666666667',
43
+ %((E17°--E11°/N14°--N18°).) =>
44
+ '11.0 14.0 17.0 18.0', # coordinates need to be reordered
45
+ %((W 170⁰--E 55⁰/N 40⁰--S 36⁰).) =>
46
+ '-170.0 -36.0 55.0 40.0', # superscript 0 is almost a degree character..
47
+ %((W 0°-W 0°/S 90°---S 90°)) =>
48
+ '-0.0 -90.0 -0.0 -90.0' # one dash, two dashes, three dashes.. what's the difference?
49
+ }.each do |value, expected|
50
+ describe 'parsing' do
51
+ let(:subject) { described_class.new(value) }
52
+
53
+ it 'transforms into the right bbox' do
54
+ expect(subject.as_bbox).to eq expected
55
+ end
56
+ end
57
+ end
58
+ end
59
+
60
+ context '#as_envelope' do
61
+ {
62
+ %((W 123°23ʹ16ʺ--W 122°31ʹ22ʺ/N 39°23ʹ57ʺ--N 38°17ʹ53ʺ)) =>
63
+ 'ENVELOPE(-123.38777777777779, -122.52277777777778, 38.29805555555556, 39.399166666666666)',
64
+ %(E 10°03'00"--E 12°58'00"/N 45°00'00"--N 41°46'00") =>
65
+ 'ENVELOPE(10.05, 12.966666666666667, 41.766666666666666, 45.0)',
66
+ %(E 8°41'-E 12°21'/N 46°04'-N 44°23') =>
67
+ 'ENVELOPE(8.683333333333334, 12.35, 44.38333333333333, 46.06666666666667)',
68
+ %((E17°--E11°/N14°--N18°).) =>
69
+ 'ENVELOPE(11.0, 17.0, 14.0, 18.0)', # coordinates need to be reordered
70
+ %((W 170⁰--E 55⁰/N 40⁰--S 36⁰).) =>
71
+ 'ENVELOPE(-170.0, 55.0, -36.0, 40.0)', # superscript 0 is almost a degree character..
72
+ %((W 0°-W 0°/S 90°---S 90°)) =>
73
+ 'ENVELOPE(-0.0, -0.0, -90.0, -90.0)' # one dash, two dashes, three dashes.. what's the difference?
74
+ }.each do |value, expected|
75
+ describe 'parsing' do
76
+ let(:subject) { described_class.new(value) }
77
+
78
+ it 'transforms into the right envelope' do
79
+ expect(subject.as_envelope).to eq expected
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stanford-mods
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-01-13 00:00:00.000000000 Z
12
+ date: 2016-01-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mods
@@ -120,6 +120,7 @@ files:
120
120
  - Rakefile
121
121
  - config/mappings_hash.rb
122
122
  - lib/stanford-mods.rb
123
+ - lib/stanford-mods/coordinate.rb
123
124
  - lib/stanford-mods/date_parsing.rb
124
125
  - lib/stanford-mods/geo_spatial.rb
125
126
  - lib/stanford-mods/name.rb
@@ -132,6 +133,7 @@ files:
132
133
  - spec/date_parsing_spec.rb
133
134
  - spec/fixtures/spotlight_pub_date_data.rb
134
135
  - spec/geo_spatial_spec.rb
136
+ - spec/lib/stanford-mods/coordinate_spec.rb
135
137
  - spec/name_spec.rb
136
138
  - spec/origin_info_spec.rb
137
139
  - spec/physical_location_spec.rb
@@ -171,6 +173,7 @@ test_files:
171
173
  - spec/date_parsing_spec.rb
172
174
  - spec/fixtures/spotlight_pub_date_data.rb
173
175
  - spec/geo_spatial_spec.rb
176
+ - spec/lib/stanford-mods/coordinate_spec.rb
174
177
  - spec/name_spec.rb
175
178
  - spec/origin_info_spec.rb
176
179
  - spec/physical_location_spec.rb