stanford-mods 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/stanford-mods.rb +1 -0
- data/lib/stanford-mods/coordinate.rb +81 -0
- data/lib/stanford-mods/geo_spatial.rb +9 -36
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/geo_spatial_spec.rb +14 -38
- data/spec/lib/stanford-mods/coordinate_spec.rb +84 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fda4f5ab13e13c81db05b27fd3ca5e5e17be0e1d
|
4
|
+
data.tar.gz: 13a2ee552756a2e700ee102ada1b56b55d5cda82
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63c3efbf88ea9d76b80182c490cb01c3d6d404858ce7d91f46b1d98c3df284cf8dad19cf6b55eb2185a1779bafd8c750dde4e04cf39e64f6c810cf030cc80359
|
7
|
+
data.tar.gz: 275a160cf3603693e5801f0d5ff530995463c00a5f77625391fc09efaca8b13f0621f5a2e5034de5e1ae9cbb27d7a59fa0dcaf53aac877d057e65e004ecc3627
|
data/lib/stanford-mods.rb
CHANGED
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Stanford
|
3
|
+
module Mods
|
4
|
+
##
|
5
|
+
# Geospatial coordinate parsing
|
6
|
+
class Coordinate
|
7
|
+
attr_reader :value
|
8
|
+
|
9
|
+
def initialize(value)
|
10
|
+
@value = value
|
11
|
+
end
|
12
|
+
|
13
|
+
# Convert the coordinate to a WKT/CQL ENVELOPE representation
|
14
|
+
def as_envelope
|
15
|
+
return unless valid?
|
16
|
+
|
17
|
+
"ENVELOPE(#{bounds[:min_x]}, #{bounds[:max_x]}, #{bounds[:min_y]}, #{bounds[:max_y]})"
|
18
|
+
end
|
19
|
+
|
20
|
+
# Convert the coordinate to a Solr 4.x bbox-format representation
|
21
|
+
def as_bbox
|
22
|
+
return unless valid?
|
23
|
+
|
24
|
+
"#{bounds[:min_x]} #{bounds[:min_y]} #{bounds[:max_x]} #{bounds[:max_y]}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def valid?
|
28
|
+
return false if bounds.empty?
|
29
|
+
|
30
|
+
range_x = -180.0..180.0
|
31
|
+
range_y = -90.0..90.0
|
32
|
+
|
33
|
+
range_x.include?(bounds[:min_x]) &&
|
34
|
+
range_x.include?(bounds[:max_x]) &&
|
35
|
+
range_y.include?(bounds[:min_y]) &&
|
36
|
+
range_y.include?(bounds[:max_y])
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def bounds
|
42
|
+
@bounds ||= begin
|
43
|
+
matches = coord.match %r{\A(?<lat>[EW].+-+.+)\s*/\s*(?<lng>[NS].+-+.+)\Z}
|
44
|
+
|
45
|
+
if matches
|
46
|
+
min_x, max_x = matches['lat'].split(/-+/).map { |x| coord_to_decimal(x) }.minmax
|
47
|
+
min_y, max_y = matches['lng'].split(/-+/).map { |y| coord_to_decimal(y) }.minmax
|
48
|
+
|
49
|
+
{ min_x: min_x, min_y: min_y, max_x: max_x, max_y: max_y }
|
50
|
+
else
|
51
|
+
{}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def coord
|
57
|
+
matches = value.match(/^\(?([^)]+)\)?\.?$/)
|
58
|
+
|
59
|
+
if matches
|
60
|
+
matches[1]
|
61
|
+
else
|
62
|
+
value
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def coord_to_decimal(point)
|
67
|
+
regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
|
68
|
+
match = regex.match(point)
|
69
|
+
|
70
|
+
return Float::INFINITY unless match
|
71
|
+
|
72
|
+
dec = match['deg'].to_i
|
73
|
+
dec += match['min'].to_f / 60
|
74
|
+
dec += match['sec'].to_f / 60 / 60
|
75
|
+
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
76
|
+
|
77
|
+
dec
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -10,50 +10,23 @@ module Stanford
|
|
10
10
|
Array(@mods_ng_xml.subject.cartographics.coordinates).map(&:text)
|
11
11
|
end
|
12
12
|
|
13
|
-
def
|
13
|
+
def coordinates_as_envelope
|
14
14
|
coordinates.map do |n|
|
15
|
-
|
15
|
+
c = Stanford::Mods::Coordinate.new(n)
|
16
16
|
|
17
|
-
if
|
18
|
-
coord_to_bbox(matches[1])
|
19
|
-
else
|
20
|
-
coord_to_bbox(n)
|
21
|
-
end
|
17
|
+
c.as_envelope if c.valid?
|
22
18
|
end.compact
|
23
19
|
end
|
24
20
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
matches = coord.match %r{\A(?<lat>[EW].+-+.+)\s*/\s*(?<lng>[NS].+-+.+)\Z}
|
29
|
-
return unless matches
|
30
|
-
|
31
|
-
min_x, max_x = matches['lat'].split(/-+/).map { |x| coord_to_decimal(x) }.minmax
|
32
|
-
min_y, max_y = matches['lng'].split(/-+/).map { |y| coord_to_decimal(y) }.minmax
|
33
|
-
|
34
|
-
"#{min_x} #{min_y} #{max_x} #{max_y}" if valid_bbox?(min_x, max_x, min_y, max_y)
|
35
|
-
end
|
36
|
-
|
37
|
-
def coord_to_decimal(point)
|
38
|
-
regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
|
39
|
-
match = regex.match(point)
|
40
|
-
|
41
|
-
return Float::INFINITY unless match
|
42
|
-
|
43
|
-
dec = match['deg'].to_i
|
44
|
-
dec += match['min'].to_f / 60
|
45
|
-
dec += match['sec'].to_f / 60 / 60
|
46
|
-
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
21
|
+
def coordinates_as_bbox
|
22
|
+
coordinates.map do |n|
|
23
|
+
c = Stanford::Mods::Coordinate.new(n)
|
47
24
|
|
48
|
-
|
25
|
+
c.as_bbox if c.valid?
|
26
|
+
end.compact
|
49
27
|
end
|
50
28
|
|
51
|
-
|
52
|
-
range_x = -180.0..180.0
|
53
|
-
range_y = -90.0..90.0
|
54
|
-
|
55
|
-
range_x.include?(min_x) && range_x.include?(max_x) && range_y.include?(min_y) && range_y.include?(max_y)
|
56
|
-
end
|
29
|
+
alias point_bbox coordinates_as_bbox
|
57
30
|
end # class Record
|
58
31
|
end # Module Mods
|
59
32
|
end # Module Stanford
|
data/spec/geo_spatial_spec.rb
CHANGED
@@ -53,7 +53,7 @@ describe "Cartographic coordinates" do
|
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
|
-
|
56
|
+
describe "#coordinates_as_bbox" do
|
57
57
|
it "returns empty array if no coordinates in the mods" do
|
58
58
|
smods_rec.from_str(no_coord)
|
59
59
|
expect(smods_rec.point_bbox).to eq([])
|
@@ -66,44 +66,20 @@ describe "Cartographic coordinates" do
|
|
66
66
|
smods_rec.from_str(with_coords)
|
67
67
|
expect(smods_rec.point_bbox).to eq(["-16.0 -15.0 28.0 13.0"])
|
68
68
|
end
|
69
|
+
end
|
69
70
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
[], # N487 is out of bounds for the bounding box
|
83
|
-
%((W 0°-W 0°/S 90°---S 90°)) =>
|
84
|
-
['-0.0 -90.0 -0.0 -90.0'], # one dash, two dashes, three dashes.. what's the difference?
|
85
|
-
%(W 650--W 100/N 700--N 550) =>
|
86
|
-
[] # missing degree character, and all coordinates are out of bounds.
|
87
|
-
}.each do |value, expected|
|
88
|
-
describe 'data mappings' do
|
89
|
-
let(:mods) do
|
90
|
-
<<-EOF
|
91
|
-
<mods xmlns="#{Mods::MODS_NS}">
|
92
|
-
<subject>
|
93
|
-
<cartographics>
|
94
|
-
<coordinates>#{value}</coordinates>
|
95
|
-
</cartographics>
|
96
|
-
</subject>
|
97
|
-
</mods>
|
98
|
-
EOF
|
99
|
-
end
|
100
|
-
|
101
|
-
let(:smods_rec) { Stanford::Mods::Record.new.from_str(mods) }
|
102
|
-
|
103
|
-
it 'maps to the right bounding box' do
|
104
|
-
expect(smods_rec.point_bbox).to eq expected
|
105
|
-
end
|
106
|
-
end
|
71
|
+
describe "#coordinates_as_envelope" do
|
72
|
+
it "returns empty array if no coordinates in the mods" do
|
73
|
+
smods_rec.from_str(no_coord)
|
74
|
+
expect(smods_rec.coordinates_as_envelope).to eq([])
|
75
|
+
end
|
76
|
+
it "returns empty array if bad data is in the mods" do
|
77
|
+
smods_rec.from_str(with_bad_data)
|
78
|
+
expect(smods_rec.coordinates_as_envelope).to eq([])
|
79
|
+
end
|
80
|
+
it "returns decimal representation of latitude and longitude" do
|
81
|
+
smods_rec.from_str(with_coords)
|
82
|
+
expect(smods_rec.coordinates_as_envelope).to eq(["ENVELOPE(-16.0, 28.0, -15.0, 13.0)"])
|
107
83
|
end
|
108
84
|
end
|
109
85
|
end # describe Cartographic coordinates
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Stanford::Mods::Coordinate do
|
5
|
+
describe '#valid' do
|
6
|
+
it 'is valid for well-formed coordinates' do
|
7
|
+
expect(described_class.new('W 123°23ʹ16ʺ--W 122°31ʹ22ʺ/N 39°23ʹ57ʺ--N 38°17ʹ53ʺ')).to be_valid
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'rejects out-of-bounds coordinates' do
|
11
|
+
expect(described_class.new('W80°--E100°/N487°--S42°')).not_to be_valid
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'rejects coordinates without degree symbols' do
|
15
|
+
expect(described_class.new('W 650--W 100/N 700--N 550')).not_to be_valid
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'rejects malformed coordinates' do
|
19
|
+
expect(described_class.new('(E29°--E35/°S12°--S16°).')).not_to be_valid
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe '#as_bbox' do
|
24
|
+
it 'is nil for invalid data' do
|
25
|
+
expect(described_class.new('x').as_bbox).to eq nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe '#as_envelope' do
|
30
|
+
it 'is nil for invalid data' do
|
31
|
+
expect(described_class.new('x').as_envelope).to eq nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context '#as_bbox' do
|
36
|
+
{
|
37
|
+
%((W 123°23ʹ16ʺ--W 122°31ʹ22ʺ/N 39°23ʹ57ʺ--N 38°17ʹ53ʺ)) =>
|
38
|
+
'-123.38777777777779 38.29805555555556 -122.52277777777778 39.399166666666666',
|
39
|
+
%(E 10°03'00"--E 12°58'00"/N 45°00'00"--N 41°46'00") =>
|
40
|
+
'10.05 41.766666666666666 12.966666666666667 45.0',
|
41
|
+
%(E 8°41'-E 12°21'/N 46°04'-N 44°23') =>
|
42
|
+
'8.683333333333334 44.38333333333333 12.35 46.06666666666667',
|
43
|
+
%((E17°--E11°/N14°--N18°).) =>
|
44
|
+
'11.0 14.0 17.0 18.0', # coordinates need to be reordered
|
45
|
+
%((W 170⁰--E 55⁰/N 40⁰--S 36⁰).) =>
|
46
|
+
'-170.0 -36.0 55.0 40.0', # superscript 0 is almost a degree character..
|
47
|
+
%((W 0°-W 0°/S 90°---S 90°)) =>
|
48
|
+
'-0.0 -90.0 -0.0 -90.0' # one dash, two dashes, three dashes.. what's the difference?
|
49
|
+
}.each do |value, expected|
|
50
|
+
describe 'parsing' do
|
51
|
+
let(:subject) { described_class.new(value) }
|
52
|
+
|
53
|
+
it 'transforms into the right bbox' do
|
54
|
+
expect(subject.as_bbox).to eq expected
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
context '#as_envelope' do
|
61
|
+
{
|
62
|
+
%((W 123°23ʹ16ʺ--W 122°31ʹ22ʺ/N 39°23ʹ57ʺ--N 38°17ʹ53ʺ)) =>
|
63
|
+
'ENVELOPE(-123.38777777777779, -122.52277777777778, 38.29805555555556, 39.399166666666666)',
|
64
|
+
%(E 10°03'00"--E 12°58'00"/N 45°00'00"--N 41°46'00") =>
|
65
|
+
'ENVELOPE(10.05, 12.966666666666667, 41.766666666666666, 45.0)',
|
66
|
+
%(E 8°41'-E 12°21'/N 46°04'-N 44°23') =>
|
67
|
+
'ENVELOPE(8.683333333333334, 12.35, 44.38333333333333, 46.06666666666667)',
|
68
|
+
%((E17°--E11°/N14°--N18°).) =>
|
69
|
+
'ENVELOPE(11.0, 17.0, 14.0, 18.0)', # coordinates need to be reordered
|
70
|
+
%((W 170⁰--E 55⁰/N 40⁰--S 36⁰).) =>
|
71
|
+
'ENVELOPE(-170.0, 55.0, -36.0, 40.0)', # superscript 0 is almost a degree character..
|
72
|
+
%((W 0°-W 0°/S 90°---S 90°)) =>
|
73
|
+
'ENVELOPE(-0.0, -0.0, -90.0, -90.0)' # one dash, two dashes, three dashes.. what's the difference?
|
74
|
+
}.each do |value, expected|
|
75
|
+
describe 'parsing' do
|
76
|
+
let(:subject) { described_class.new(value) }
|
77
|
+
|
78
|
+
it 'transforms into the right envelope' do
|
79
|
+
expect(subject.as_envelope).to eq expected
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford-mods
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-01-
|
12
|
+
date: 2016-01-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mods
|
@@ -120,6 +120,7 @@ files:
|
|
120
120
|
- Rakefile
|
121
121
|
- config/mappings_hash.rb
|
122
122
|
- lib/stanford-mods.rb
|
123
|
+
- lib/stanford-mods/coordinate.rb
|
123
124
|
- lib/stanford-mods/date_parsing.rb
|
124
125
|
- lib/stanford-mods/geo_spatial.rb
|
125
126
|
- lib/stanford-mods/name.rb
|
@@ -132,6 +133,7 @@ files:
|
|
132
133
|
- spec/date_parsing_spec.rb
|
133
134
|
- spec/fixtures/spotlight_pub_date_data.rb
|
134
135
|
- spec/geo_spatial_spec.rb
|
136
|
+
- spec/lib/stanford-mods/coordinate_spec.rb
|
135
137
|
- spec/name_spec.rb
|
136
138
|
- spec/origin_info_spec.rb
|
137
139
|
- spec/physical_location_spec.rb
|
@@ -171,6 +173,7 @@ test_files:
|
|
171
173
|
- spec/date_parsing_spec.rb
|
172
174
|
- spec/fixtures/spotlight_pub_date_data.rb
|
173
175
|
- spec/geo_spatial_spec.rb
|
176
|
+
- spec/lib/stanford-mods/coordinate_spec.rb
|
174
177
|
- spec/name_spec.rb
|
175
178
|
- spec/origin_info_spec.rb
|
176
179
|
- spec/physical_location_spec.rb
|