aipp 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +1 -2
- data/CHANGELOG.md +15 -0
- data/README.md +122 -37
- data/TODO.md +4 -0
- data/aipp.gemspec +8 -3
- data/lib/aipp.rb +14 -2
- data/lib/aipp/aip.rb +44 -29
- data/lib/aipp/downloader.rb +115 -0
- data/lib/aipp/executable.rb +6 -6
- data/lib/aipp/parser.rb +23 -23
- data/lib/aipp/patcher.rb +47 -0
- data/lib/aipp/pdf.rb +123 -0
- data/lib/aipp/regions/LF/AD-1.3.rb +162 -0
- data/lib/aipp/regions/LF/AD-1.3.yml +511 -0
- data/lib/aipp/regions/LF/AD-1.6.rb +31 -0
- data/lib/aipp/regions/LF/AD-2.rb +316 -0
- data/lib/aipp/regions/LF/AD-2.yml +185 -0
- data/lib/aipp/regions/LF/AD-3.1.rb-NEW +11 -0
- data/lib/aipp/regions/LF/ENR-2.1.rb +25 -24
- data/lib/aipp/regions/LF/ENR-4.1.rb +24 -23
- data/lib/aipp/regions/LF/ENR-4.3.rb +8 -6
- data/lib/aipp/regions/LF/ENR-5.1.rb +32 -22
- data/lib/aipp/regions/LF/ENR-5.5.rb-NEW +11 -0
- data/lib/aipp/regions/LF/helpers/AD_radio.rb +90 -0
- data/lib/aipp/regions/LF/helpers/URL.rb +26 -0
- data/lib/aipp/regions/LF/helpers/common.rb +186 -0
- data/lib/aipp/version.rb +1 -1
- data/lib/core_ext/enumerable.rb +52 -0
- data/lib/core_ext/nil_class.rb +10 -0
- data/lib/core_ext/object.rb +42 -0
- data/lib/core_ext/string.rb +105 -0
- data/spec/fixtures/archive.zip +0 -0
- data/spec/fixtures/document.pdf +0 -0
- data/spec/fixtures/document.pdf.json +1 -0
- data/spec/fixtures/new.html +6 -0
- data/spec/fixtures/new.pdf +0 -0
- data/spec/fixtures/new.txt +1 -0
- data/spec/lib/aipp/downloader_spec.rb +81 -0
- data/spec/lib/aipp/patcher_spec.rb +46 -0
- data/spec/lib/aipp/pdf_spec.rb +124 -0
- data/spec/lib/core_ext/enumberable_spec.rb +76 -0
- data/spec/lib/core_ext/nil_class_spec.rb +11 -0
- data/spec/lib/core_ext/string_spec.rb +88 -0
- data/spec/spec_helper.rb +1 -0
- metadata +123 -23
- data/lib/aipp/progress.rb +0 -40
- data/lib/aipp/refinements.rb +0 -114
- data/lib/aipp/regions/LF/helper.rb +0 -177
- data/spec/lib/aipp/refinements_spec.rb +0 -123
@@ -0,0 +1,186 @@
|
|
1
|
+
module AIPP
|
2
|
+
module LF
|
3
|
+
module Helpers
|
4
|
+
module Common
|
5
|
+
|
6
|
+
using AIXM::Refinements
|
7
|
+
|
8
|
+
# Map border names to OFMX
|
9
|
+
BORDERS = {
|
10
|
+
'franco-allemande' => 'FRANCE_GERMANY',
|
11
|
+
'franco-espagnole' => 'FRANCE_SPAIN',
|
12
|
+
'franco-italienne' => 'FRANCE_ITALY',
|
13
|
+
'franco-suisse' => 'FRANCE_SWITZERLAND',
|
14
|
+
'franco-luxembourgeoise' => 'FRANCE_LUXEMBOURG',
|
15
|
+
'franco-belge' => 'BELGIUM_FRANCE',
|
16
|
+
'germano-suisse' => 'GERMANY_SWITZERLAND',
|
17
|
+
'hispano-andorrane' => 'ANDORRA_SPAIN',
|
18
|
+
'la côte atlantique française' => 'FRANCE_ATLANTIC_COAST', # TODO: handle internally
|
19
|
+
'côte méditérrannéenne' => 'FRANCE_MEDITERRANEAN_COAST', # TODO: handle internally
|
20
|
+
'limite des eaux territoriales atlantique françaises' => 'FRANCE_ATLANTIC_TERRITORIAL_SEA', # TODO: handle internally
|
21
|
+
'parc national des écrins' => 'FRANCE_ECRINS_NATIONAL_PARK' # TODO: handle internally
|
22
|
+
}.freeze
|
23
|
+
|
24
|
+
# Intersection points between three countries
|
25
|
+
INTERSECTIONS = {
|
26
|
+
'FRANCE_SPAIN|ANDORRA_SPAIN' => AIXM.xy(lat: 42.502720, long: 1.725965),
|
27
|
+
'ANDORRA_SPAIN|FRANCE_SPAIN' => AIXM.xy(lat: 42.603571, long: 1.442681),
|
28
|
+
'FRANCE_SWITZERLAND|FRANCE_ITALY' => AIXM.xy(lat: 45.922701, long: 7.044125),
|
29
|
+
'BELGIUM_FRANCE|FRANCE_LUXEMBOURG' => AIXM.xy(lat: 49.546428, long: 5.818415),
|
30
|
+
'FRANCE_LUXEMBOURG|FRANCE_GERMANY' => AIXM.xy(lat: 49.469438, long: 6.367516),
|
31
|
+
'FRANCE_GERMANY|FRANCE_SWITZERLAND' => AIXM.xy(lat: 47.589831, long: 7.589049),
|
32
|
+
'GERMANY_SWITZERLAND|FRANCE_GERMANY' => AIXM.xy(lat: 47.589831, long: 7.589049)
|
33
|
+
}
|
34
|
+
|
35
|
+
# Map surface compositions to OFMX composition and preparation
|
36
|
+
COMPOSITIONS = {
|
37
|
+
'revêtue' => { preparation: :paved },
|
38
|
+
'non revêtue' => { preparation: :natural },
|
39
|
+
'macadam' => { composition: :macadam },
|
40
|
+
'béton' => { composition: :concrete, preparation: :paved },
|
41
|
+
'béton bitumineux' => { composition: :bitumen, preparation: :paved },
|
42
|
+
'enrobé bitumineux' => { composition: :bitumen },
|
43
|
+
'asphalte' => { composition: :asphalt, preparation: :paved },
|
44
|
+
'gazon' => { composition: :grass }
|
45
|
+
}
|
46
|
+
|
47
|
+
# Transform French text fragments to English
|
48
|
+
ANGLICISE_MAP = {
|
49
|
+
/[^A-Z0-9 .\-]/ => '',
|
50
|
+
/0(\d)/ => '\1',
|
51
|
+
/(\d)-(\d)/ => '\1.\2',
|
52
|
+
/PARTIE/ => '',
|
53
|
+
/DELEG\./ => 'DELEG ',
|
54
|
+
/FRANCAISE?/ => 'FR',
|
55
|
+
/ANGLAISE?/ => 'UK',
|
56
|
+
/BELGE/ => 'BE',
|
57
|
+
/LUXEMBOURGEOISE?/ => 'LU',
|
58
|
+
/ALLEMANDE?/ => 'DE',
|
59
|
+
/SUISSE/ => 'CH',
|
60
|
+
/ITALIEN(?:NE)?/ => 'IT',
|
61
|
+
/ESPAGNOLE?/ => 'ES',
|
62
|
+
/ANDORRANE?/ => 'AD',
|
63
|
+
/NORD/ => 'N',
|
64
|
+
/EST/ => 'E',
|
65
|
+
/SUD/ => 'S',
|
66
|
+
/OEST/ => 'W',
|
67
|
+
/ANGLO NORMANDES/ => 'ANGLO-NORMANDES',
|
68
|
+
/ +/ => ' '
|
69
|
+
}.freeze
|
70
|
+
|
71
|
+
# Templates
|
72
|
+
|
73
|
+
def organisation_lf
|
74
|
+
@organisation_lf ||= AIXM.organisation(
|
75
|
+
name: 'FRANCE',
|
76
|
+
type: 'S'
|
77
|
+
).tap do |organisation|
|
78
|
+
organisation.id = 'LF'
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Transformations
|
83
|
+
|
84
|
+
def prepare(html:)
|
85
|
+
html.tap do |node|
|
86
|
+
node.css('del, tr[class*="AmdtDeletedAIRAC"]').each(&:remove) # remove deleted entries
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def anglicise(name:)
|
91
|
+
name&.uptrans&.tap do |string|
|
92
|
+
ANGLICISE_MAP.each do |regexp, replacement|
|
93
|
+
string.gsub!(regexp, replacement)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Parsers
|
99
|
+
|
100
|
+
def source(position:, aip_file: nil)
|
101
|
+
aip_file ||= @aip
|
102
|
+
[
|
103
|
+
options[:region],
|
104
|
+
aip_file.split('-').first,
|
105
|
+
aip_file,
|
106
|
+
options[:airac].date.xmlschema,
|
107
|
+
position
|
108
|
+
].join('|')
|
109
|
+
end
|
110
|
+
|
111
|
+
def xy_from(text)
|
112
|
+
parts = text.strip.split(/\s+/)
|
113
|
+
AIXM.xy(lat: parts[0], long: parts[1])
|
114
|
+
end
|
115
|
+
|
116
|
+
def z_from(limit)
|
117
|
+
case limit
|
118
|
+
when nil then nil
|
119
|
+
when 'SFC' then AIXM::GROUND
|
120
|
+
when 'UNL' then AIXM::UNLIMITED
|
121
|
+
when /(\d+)ftASFC/ then AIXM.z($1.to_i, :qfe)
|
122
|
+
when /(\d+)ftAMSL/ then AIXM.z($1.to_i, :qnh)
|
123
|
+
when /FL(\d+)/ then AIXM.z($1.to_i, :qne)
|
124
|
+
else fail "z `#{limit}' not recognized"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def layer_from(text_for_limits, text_for_class=nil)
|
129
|
+
above, below = text_for_limits.gsub(/ /, '').split(/\n+/).select(&:blank_to_nil).split { |e| e.match? '---+' }
|
130
|
+
above.reverse!
|
131
|
+
AIXM.layer(
|
132
|
+
class: text_for_class,
|
133
|
+
vertical_limits: AIXM.vertical_limits(
|
134
|
+
max_z: z_from(above[1]),
|
135
|
+
upper_z: z_from(above[0]),
|
136
|
+
lower_z: z_from(below[0]),
|
137
|
+
min_z: z_from(below[1])
|
138
|
+
)
|
139
|
+
)
|
140
|
+
end
|
141
|
+
|
142
|
+
def geometry_from(text)
|
143
|
+
AIXM.geometry.tap do |geometry|
|
144
|
+
buffer = {}
|
145
|
+
text.gsub(/\s+/, ' ').strip.split(/ - /).append('end').each do |element|
|
146
|
+
case element
|
147
|
+
when /arc (anti-)?horaire .+ sur (\S+) , (\S+)/i
|
148
|
+
geometry << AIXM.arc(
|
149
|
+
xy: buffer.delete(:xy),
|
150
|
+
center_xy: AIXM.xy(lat: $2, long: $3),
|
151
|
+
clockwise: $1.nil?
|
152
|
+
)
|
153
|
+
when /cercle de ([\d\.]+) (NM|km|m) .+ sur (\S+) , (\S+)/i
|
154
|
+
geometry << AIXM.circle(
|
155
|
+
center_xy: AIXM.xy(lat: $3, long: $4),
|
156
|
+
radius: AIXM.d($1.to_f, $2)
|
157
|
+
)
|
158
|
+
when /end|(\S+) , (\S+)/
|
159
|
+
geometry << AIXM.point(xy: buffer[:xy]) if buffer.has_key?(:xy)
|
160
|
+
buffer[:xy] = AIXM.xy(lat: $1, long: $2) if $1
|
161
|
+
when /^frontière ([\w-]+)/i, /^(\D[^(]+)/i
|
162
|
+
border_name = BORDERS.fetch($1.downcase.strip)
|
163
|
+
buffer[:xy] ||= INTERSECTIONS.fetch("#{buffer[:border_name]}|#{border_name}")
|
164
|
+
buffer[:border_name] = border_name
|
165
|
+
if border_name == 'FRANCE_SPAIN' # specify which part of this split border
|
166
|
+
border_name += buffer[:xy].lat < 42.55 ? '_EAST' : '_WEST'
|
167
|
+
end
|
168
|
+
geometry << AIXM.border(
|
169
|
+
xy: buffer.delete(:xy),
|
170
|
+
name: border_name
|
171
|
+
)
|
172
|
+
else
|
173
|
+
fail "geometry `#{element}' not recognized"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def timetable_from(text)
|
180
|
+
AIXM::H24 if text.gsub(/\W/, '') == 'H24'
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
data/lib/aipp/version.rb
CHANGED
@@ -0,0 +1,52 @@
|
|
1
|
+
module Enumerable
|
2
|
+
|
3
|
+
# !method split(object=nil, &block)
|
4
|
+
# Divides an enumerable into sub-enumerables based on a delimiter,
|
5
|
+
# returning an array of these sub-enumerables.
|
6
|
+
#
|
7
|
+
# @example
|
8
|
+
# [1, 2, 0, 3, 4].split { |e| e == 0 } # => [[1, 2], [3, 4]]
|
9
|
+
# [1, 2, 0, 3, 4].split(0) # => [[1, 2], [3, 4]]
|
10
|
+
# [0, 0, 1, 0, 2].split(0) # => [[], [] [1], [2]]
|
11
|
+
# [1, 0, 0, 2, 3].split(0) # => [[1], [], [2], [3]]
|
12
|
+
# [1, 0, 2, 0, 0].split(0) # => [[1], [2]]
|
13
|
+
#
|
14
|
+
# @note While similar to +Array#split+ from ActiveSupport, this core
|
15
|
+
# extension works for all enumerables and therefore works fine with.
|
16
|
+
# Nokogiri. Also, it behaves more like +String#split+ by ignoring any
|
17
|
+
# trailing zero-length sub-enumerators.
|
18
|
+
#
|
19
|
+
# @param object [Object] element at which to split
|
20
|
+
# @yield [Object] element to analyze
|
21
|
+
# @yieldreturn [Boolean] whether to split at this element or not
|
22
|
+
# @return [Array]
|
23
|
+
def split(*args, &block)
|
24
|
+
[].tap do |array|
|
25
|
+
while index = slice((start ||= 0)...length).find_index(*args, &block)
|
26
|
+
array << slice(start...start+index)
|
27
|
+
start += index + 1
|
28
|
+
end
|
29
|
+
array << slice(start..-1) if start < length
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# !method group_by_chunks(&block)
|
34
|
+
# Build a hash which maps elements matching the chunk condition to
|
35
|
+
# an array of subsequent elements which don't match the chunk condition.
|
36
|
+
#
|
37
|
+
# @example
|
38
|
+
# [1, 10, 11, 12, 2, 20, 21, 3, 30, 31, 32].group_by_chunks { |i| i < 10 }
|
39
|
+
# # => { 1 => [10, 11, 12], 2 => [20, 21], 3 => [30, 31, 32] }
|
40
|
+
#
|
41
|
+
# @note The first element must match the chunk condition.
|
42
|
+
#
|
43
|
+
# @yield [Object] object to analyze
|
44
|
+
# @yieldreturn [Boolean] chunk condition: begin a new chunk with this
|
45
|
+
# object as key if the condition returns true
|
46
|
+
# @return [Hash]
|
47
|
+
def group_by_chunks
|
48
|
+
fail(ArgumentError, "first element must match chunk condition") unless yield(first)
|
49
|
+
slice_when { |_, e| yield(e) }.map { |e| [e.first, e[1..]] }.to_h
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class Object
|
2
|
+
|
3
|
+
# Issue a warning and maybe open a Pry session attached to the error or
|
4
|
+
# binding passed.
|
5
|
+
#
|
6
|
+
# @example with error context
|
7
|
+
# begin
|
8
|
+
# (...)
|
9
|
+
# rescue => error
|
10
|
+
# warn("oops", pry: error)
|
11
|
+
# end
|
12
|
+
# @example with binding context
|
13
|
+
# warn("oops", pry: binding)
|
14
|
+
# @param message [String] warning message
|
15
|
+
# @param pry [Exception, Binding, nil] attach the Pry session to this error
|
16
|
+
# or binding
|
17
|
+
def warn(message, pry:)
|
18
|
+
$WARN_COUNTER = $WARN_COUNTER.to_i + 1
|
19
|
+
Kernel.warn "WARNING #{$WARN_COUNTER}: #{message}".red
|
20
|
+
if $PRY_ON_WARN == true || $PRY_ON_WARN == $WARN_COUNTER
|
21
|
+
case pry
|
22
|
+
when Exception then Pry::rescued(pry)
|
23
|
+
when Binding then pry.pry
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Issue an informational message.
|
29
|
+
#
|
30
|
+
# @param message [String] informational message
|
31
|
+
def info(message, color: :black)
|
32
|
+
puts message.send(color)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Issue a debug message.
|
36
|
+
#
|
37
|
+
# @param message [String] debug message
|
38
|
+
def debug(message, color: :blue)
|
39
|
+
info(message, color: color) if $DEBUG
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
class String
|
2
|
+
|
3
|
+
# Convert blank strings to +nil+.
|
4
|
+
#
|
5
|
+
# @example
|
6
|
+
# "foobar".blank_to_nil # => "foobar"
|
7
|
+
# " ".blank_to_nil # => nil
|
8
|
+
# "".blank_to_nil # => nil
|
9
|
+
# nil.blank_to_nil # => nil
|
10
|
+
#
|
11
|
+
# @return [String, nil] converted string
|
12
|
+
def blank_to_nil
|
13
|
+
self if present?
|
14
|
+
end
|
15
|
+
|
16
|
+
# Strip and collapse unnecessary whitespace
|
17
|
+
#
|
18
|
+
# @note While similar to +String#squish+ from ActiveSupport, newlines +\n+
|
19
|
+
# are preserved and not collapsed into one space.
|
20
|
+
#
|
21
|
+
# @example
|
22
|
+
# " foo\n\nbar \r".copact # => "foo\nbar"
|
23
|
+
#
|
24
|
+
# @return [String] compacted string
|
25
|
+
def compact
|
26
|
+
split("\n").map { |s| s.squish.blank_to_nil }.compact.join("\n")
|
27
|
+
end
|
28
|
+
|
29
|
+
# Fix messy oddities such as the use of two apostrophes instead of a quote
|
30
|
+
#
|
31
|
+
# @example
|
32
|
+
# "the ''Terror'' was a fine ship".cleanup # => "the \"Terror\" was a fine ship"
|
33
|
+
#
|
34
|
+
# @return [String] cleaned string
|
35
|
+
def cleanup
|
36
|
+
gsub(/[#{AIXM::MIN}]{2}|[#{AIXM::SEC}]/, '"'). # unify quotes
|
37
|
+
gsub(/[#{AIXM::MIN}]/, "'"). # unify apostrophes
|
38
|
+
gsub(/"[[:blank:]]*(.*?)[[:blank:]]*"/m, '"\1"'). # remove whitespace within quotes
|
39
|
+
split(/\r?\n/).map { |s| s.strip.blank_to_nil }.compact.join("\n") # remove blank lines
|
40
|
+
end
|
41
|
+
|
42
|
+
# Add spaces between obviously glued words:
|
43
|
+
# * camel glued words
|
44
|
+
# * three-or-more-letter and number-only words
|
45
|
+
#
|
46
|
+
# @example
|
47
|
+
# "thisString has spaceProblems".unglue # => "this String has space problems"
|
48
|
+
# "the first123meters of D25".unglue # => "the first 123 meters of D25"
|
49
|
+
#
|
50
|
+
# @return [String] unglued string
|
51
|
+
def unglue
|
52
|
+
self.dup.tap do |string|
|
53
|
+
[/([[:lower:]])([[:upper:]])/, /([[:alpha:]]{3,})(\d)/, /(\d)([[:alpha:]]{3,})/].freeze.each do |regexp|
|
54
|
+
string.gsub!(regexp, '\1 \2')
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Calculate the correlation of two strings by counting mutual words
|
60
|
+
#
|
61
|
+
# Both strings are normalized as follows:
|
62
|
+
# * remove accents, umlauts etc
|
63
|
+
# * remove everything but members of the +\w+ class
|
64
|
+
# * downcase
|
65
|
+
#
|
66
|
+
# The normalized strings are split into words. Only words fulfilling either
|
67
|
+
# of the following conditions are taken into consideration:
|
68
|
+
# * words present in and translated by the +synonyms+ map
|
69
|
+
# * words of at least 5 characters length
|
70
|
+
# * words consisting of exactly one letter followed by any number of digits
|
71
|
+
# (an optional whitespace between the two is ignored, e.g. "D 25" is the
|
72
|
+
# same as "D25")
|
73
|
+
#
|
74
|
+
# The +synonyms+ map is an array where terms in even positions map to their
|
75
|
+
# synonym in the following (odd) position:
|
76
|
+
#
|
77
|
+
# SYNONYMS = ['term1', 'synonym1', 'term2', 'synonym2']
|
78
|
+
#
|
79
|
+
# @example
|
80
|
+
# subject = "Truck en route on N 3 sud"
|
81
|
+
# subject.correlate("my car is on D25") # => 0
|
82
|
+
# subject.correlate("my truck is on D25") # => 1
|
83
|
+
# subject.correlate("my truck is on N3") # => 2
|
84
|
+
# subject.correlate("south", ['sud', 'south']) # => 1
|
85
|
+
#
|
86
|
+
# @param other [String] string to compare with
|
87
|
+
# @param synonyms [Array<String>] array of synonym pairs
|
88
|
+
# @return [Integer] 0 for unrelated strings and positive integers for related
|
89
|
+
# strings with higher numbers indicating tighter correlation
|
90
|
+
def correlate(other, synonyms=[])
|
91
|
+
self_words, other_words = [self, other].map do |string|
|
92
|
+
string.
|
93
|
+
unicode_normalize(:nfd).
|
94
|
+
downcase.gsub(/[-\u2013]/, ' ').
|
95
|
+
remove(/[^\w\s]/).
|
96
|
+
gsub(/\b(\w)\s?(\d+)\b/, '\1\2').
|
97
|
+
compact.
|
98
|
+
split(/\W+/).
|
99
|
+
map { |w| (i = synonyms.index(w)).nil? ? w : (i.odd? ? w : synonyms[i + 1]).upcase }.
|
100
|
+
keep_if { |w| w.match?(/\w{5,}|\w\d+|[[:upper:]]/) }.
|
101
|
+
uniq
|
102
|
+
end
|
103
|
+
(self_words & other_words).count
|
104
|
+
end
|
105
|
+
end
|
Binary file
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
["page 1, line 1\npage 1, line 2\npage 1, line 3\npage 1, line 4\npage 1, line 5\fpage 2, line 1\npage 2, line 2\npage 2, line 3\npage 2, line 4\npage 2, line 5\fpage 3, line 1\npage 3, line 2\npage 3, line 3\npage 3, line 4\npage 3, line 5",[74,149,225]]
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
fixture-txt-new
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
|
3
|
+
describe AIPP::Downloader do
|
4
|
+
let :fixtures_dir do
|
5
|
+
Pathname(__FILE__).join('..', '..', '..', 'fixtures')
|
6
|
+
end
|
7
|
+
|
8
|
+
let :tmp_dir do
|
9
|
+
Pathname(Dir.mktmpdir).tap do |tmp_dir|
|
10
|
+
(archives_dir = tmp_dir.join('archives')).mkpath
|
11
|
+
FileUtils.cp(fixtures_dir.join('archive.zip'), archives_dir)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
after do
|
16
|
+
FileUtils.rm_rf(tmp_dir)
|
17
|
+
end
|
18
|
+
|
19
|
+
describe :read do
|
20
|
+
context "archive does not exist" do
|
21
|
+
it "creates the archive" do
|
22
|
+
Spy.on(Kernel, open: File.open(fixtures_dir.join('new.html')))
|
23
|
+
subject = AIPP::Downloader.new(storage: tmp_dir, archive: 'new-archive') do |downloader|
|
24
|
+
File.exist?(tmp_dir.join('work')).must_equal true
|
25
|
+
downloader.read(document: 'new', url: 'http://localhost/new.html')
|
26
|
+
end
|
27
|
+
zip_entries(subject.archive_file).must_equal %w(new.html)
|
28
|
+
subject.send(:archives_path).children.count.must_equal 2
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context "archive does exist" do
|
33
|
+
it "unzips and uses the archive" do
|
34
|
+
Spy.on(Kernel, open: File.open(fixtures_dir.join('new.html')))
|
35
|
+
subject = AIPP::Downloader.new(storage: tmp_dir, archive: 'archive') do |downloader|
|
36
|
+
File.exist?(tmp_dir.join('work')).must_equal true
|
37
|
+
downloader.read(document: 'new', url: 'http://localhost/new.html').tap do |content|
|
38
|
+
content.must_be_instance_of Nokogiri::HTML5::Document
|
39
|
+
content.text.must_match /fixture-html-new/
|
40
|
+
end
|
41
|
+
end
|
42
|
+
zip_entries(subject.archive_file).must_equal %w(new.html one.html two.html)
|
43
|
+
subject.send(:archives_path).children.count.must_equal 1
|
44
|
+
end
|
45
|
+
|
46
|
+
it "downloads HTML documents to Nokogiri::HTML5::Document" do
|
47
|
+
Spy.on(Kernel, open: File.open(fixtures_dir.join('new.html')))
|
48
|
+
AIPP::Downloader.new(storage: tmp_dir, archive: 'archive') do |downloader|
|
49
|
+
downloader.read(document: 'new', url: 'http://localhost/new.html').tap do |content|
|
50
|
+
content.must_be_instance_of Nokogiri::HTML5::Document
|
51
|
+
content.text.must_match /fixture-html-new/
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
it "downloads and caches PDF documents to AIPP::PDF" do
|
57
|
+
Spy.on(Kernel, open: File.open(fixtures_dir.join('new.pdf')))
|
58
|
+
AIPP::Downloader.new(storage: tmp_dir, archive: 'archive') do |downloader|
|
59
|
+
downloader.read(document: 'new', url: 'http://localhost/new.pdf').tap do |content|
|
60
|
+
content.must_be_instance_of AIPP::PDF
|
61
|
+
content.text.must_match /fixture-pdf-new/
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
it "downloads explicitly specified type" do
|
67
|
+
Spy.on(Kernel, open: File.open(fixtures_dir.join('new.pdf')))
|
68
|
+
AIPP::Downloader.new(storage: tmp_dir, archive: 'archive') do |downloader|
|
69
|
+
downloader.read(document: 'new', url: 'http://localhost/new', type: :pdf).tap do |content|
|
70
|
+
content.must_be_instance_of AIPP::PDF
|
71
|
+
content.text.must_match /fixture-pdf-new/
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def zip_entries(zip_file)
|
79
|
+
Zip::File.open(zip_file).entries.map(&:name).sort
|
80
|
+
end
|
81
|
+
end
|