scapeshift 1.0.1rg0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +36 -0
- data/.yardopts +1 -0
- data/Gemfile +18 -0
- data/Gemfile.lock +29 -0
- data/LICENSE +20 -0
- data/README.md +76 -0
- data/Rakefile +64 -0
- data/VERSION +1 -0
- data/lib/scapeshift.rb +9 -0
- data/lib/scapeshift/card.rb +404 -0
- data/lib/scapeshift/crawler.rb +64 -0
- data/lib/scapeshift/crawlers.rb +20 -0
- data/lib/scapeshift/crawlers/base.rb +107 -0
- data/lib/scapeshift/crawlers/cards.rb +331 -0
- data/lib/scapeshift/crawlers/meta.rb +136 -0
- data/lib/scapeshift/crawlers/single.rb +404 -0
- data/lib/scapeshift/errors.rb +85 -0
- data/scapeshift.gemspec +78 -0
- data/test/helper.rb +16 -0
- data/test/test_base_crawler.rb +48 -0
- data/test/test_card.rb +80 -0
- data/test/test_card_crawler.rb +92 -0
- data/test/test_crawler_main.rb +16 -0
- data/test/test_meta_crawler.rb +78 -0
- data/test/test_single_crawler.rb +189 -0
- metadata +102 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
module Scapeshift
|
6
|
+
module Crawlers
|
7
|
+
|
8
|
+
##
|
9
|
+
# The Meta crawler scrapes meta data such as expansion sets and formats
|
10
|
+
# from the Oracle main search page. Like the other Crawlers, it overrides
|
11
|
+
# the {#crawl} method from {Base}.
|
12
|
+
#
|
13
|
+
# @example Directly instantiating the crawler
|
14
|
+
# crawler = Scapeshift::Crawlers::Meta.new :type => :sets
|
15
|
+
# @sets = crawler.crawl
|
16
|
+
#
|
17
|
+
# @author Josh Lindsey
|
18
|
+
#
|
19
|
+
# @since 0.1.4
|
20
|
+
#
|
21
|
+
class Meta < Base
|
22
|
+
has_callback_hook :before_scrape
|
23
|
+
has_callback_hook :after_scrape
|
24
|
+
|
25
|
+
## @return [Nokogiri::HTML::Document] The Nokogiri document representing the page
|
26
|
+
attr_reader :doc
|
27
|
+
|
28
|
+
## @return [SortedSet <String>] The SortedSet containing the scraped data
|
29
|
+
attr_reader :meta
|
30
|
+
|
31
|
+
## The Oracle homepage, which is what we are scraping from
|
32
|
+
Meta_URI = 'http://gatherer.wizards.com/Pages/Default.aspx'
|
33
|
+
|
34
|
+
##
|
35
|
+
# Creates a new Meta crawler instance.
|
36
|
+
#
|
37
|
+
# @param [Hash] opts Options for specifying the metadata to scrape
|
38
|
+
# @option opts [Symbol (:sets|:formats|:types)] :type ('') The type of metadata to scrape
|
39
|
+
#
|
40
|
+
# @return [Scapeshift::Crawlers::Meta] The Meta crawler object
|
41
|
+
#
|
42
|
+
# @raise [Scapeshift::Errors::InsufficientOptions] If :type isn't passed
|
43
|
+
#
|
44
|
+
# @author Josh Lindsey
|
45
|
+
#
|
46
|
+
# @since 0.3.0
|
47
|
+
#
|
48
|
+
def initialize(opts = {})
|
49
|
+
super opts
|
50
|
+
|
51
|
+
@meta = SortedSet.new
|
52
|
+
|
53
|
+
if self.options[:type].nil?
|
54
|
+
raise Scapeshift::Errors::InsufficientOptions.new "This crawler MUST be passed :type"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# Scrapes the Oracle homepage for the specified data. Overridden from
|
60
|
+
# {Base#crawl}.
|
61
|
+
#
|
62
|
+
# @return [SortedSet <String>] A SortedSet containing the data
|
63
|
+
#
|
64
|
+
# @raise [Scapeshift::Errors::UnknownMetaType] If an unsupported metadata type is supplied
|
65
|
+
#
|
66
|
+
# @author Josh Lindsey
|
67
|
+
#
|
68
|
+
# @since 0.1.0
|
69
|
+
#
|
70
|
+
def crawl
|
71
|
+
@doc = Nokogiri::HTML open(Meta_URI)
|
72
|
+
|
73
|
+
self.hook :before_scrape, @doc
|
74
|
+
|
75
|
+
case @options[:type]
|
76
|
+
when :sets
|
77
|
+
_scrape_sets @doc
|
78
|
+
when :formats
|
79
|
+
_scrape_formats @doc
|
80
|
+
when :types
|
81
|
+
_scrape_types @doc
|
82
|
+
else
|
83
|
+
raise Scapeshift::Errors::UnknownMetaType.new "Unknown metadata type: '#{options[:type]}'"
|
84
|
+
end
|
85
|
+
|
86
|
+
self.hook :after_scrape, @meta
|
87
|
+
|
88
|
+
@meta
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
##
|
94
|
+
# Scrapes the expansion set data from the document.
|
95
|
+
#
|
96
|
+
# @param [Nokogiri::HTML::Document] doc The full document of the Oracle page
|
97
|
+
#
|
98
|
+
# @author Josh Lindsey
|
99
|
+
#
|
100
|
+
# @since 0.1.4
|
101
|
+
#
|
102
|
+
def _scrape_sets doc
|
103
|
+
sets = doc.css 'select#ctl00_ctl00_MainContent_Content_SearchControls_setAddText'
|
104
|
+
sets.children.each { |set| @meta << set['value'] unless set['value'].empty? }
|
105
|
+
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Scrapes the Format data from the document.
|
109
|
+
#
|
110
|
+
# @param [Nokogiri::HTML::Document] doc The full document of the Oracle page
|
111
|
+
#
|
112
|
+
# @author Josh Lindsey
|
113
|
+
#
|
114
|
+
# @since 0.1.4
|
115
|
+
#
|
116
|
+
def _scrape_formats doc
|
117
|
+
formats = doc.css 'select#ctl00_ctl00_MainContent_Content_SearchControls_formatAddText'
|
118
|
+
formats.children.each { |format| @meta << format['value'] }
|
119
|
+
end
|
120
|
+
|
121
|
+
##
|
122
|
+
# Scrapes the card types data from the document.
|
123
|
+
#
|
124
|
+
# @param [Nokogiri::HTML::Document] doc The full document of the Oracle page
|
125
|
+
#
|
126
|
+
# @author Josh Lindsey
|
127
|
+
#
|
128
|
+
# @since 0.1.4
|
129
|
+
#
|
130
|
+
def _scrape_types doc
|
131
|
+
types = doc.css'select#ctl00_ctl00_MainContent_Content_SearchControls_typeAddText'
|
132
|
+
types.children.each { |type| @meta << type['value'] }
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,404 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
module Scapeshift
|
6
|
+
module Crawlers
|
7
|
+
|
8
|
+
##
|
9
|
+
# Scrapes the Oracle card detail page for a single card. Like
|
10
|
+
# the other Crawlers, it overrides the {#crawl} method from {Base}.
|
11
|
+
#
|
12
|
+
# @example Directly instantiating the crawler
|
13
|
+
# crawler = Scapeshift::Crawlers::Single.new :name => 'Counterspell'
|
14
|
+
# @card = crawler.crawl
|
15
|
+
#
|
16
|
+
# @todo Add support for scraping Planechase Plane cards.
|
17
|
+
#
|
18
|
+
# @author Josh Lindsey
|
19
|
+
#
|
20
|
+
# @since 0.2.0
|
21
|
+
#
|
22
|
+
class Single < Base
|
23
|
+
has_callback_hook :before_scrape
|
24
|
+
has_callback_hook :after_scrape
|
25
|
+
has_callback_hook :every_attr
|
26
|
+
|
27
|
+
## The details page for cards by multiverse id. Joined with a card's multiverse id.
|
28
|
+
Card_Multiverse_ID_Search_URI = 'http://gatherer.wizards.com/Pages/Card/Details.aspx?multiverseid='
|
29
|
+
|
30
|
+
## The base search page for card names. Joined to {Card_Name_Frag}.
|
31
|
+
Card_Name_Search_URI = 'http://gatherer.wizards.com/Pages/Search/Default.aspx?name='
|
32
|
+
|
33
|
+
## The search fragment for each word in the name. Interpolated
|
34
|
+
## with each word in the Card name.
|
35
|
+
Card_Name_Frag = '+[%s]'
|
36
|
+
|
37
|
+
## @return [Scapeshift::Card] The {Card} object representing the scraped data
|
38
|
+
attr_reader :card
|
39
|
+
|
40
|
+
## @return [Nokogiri::HTML::Document] The Nokogiri document representing the card detail page
|
41
|
+
attr_reader :doc
|
42
|
+
|
43
|
+
##
|
44
|
+
# Creates a new Single crawler object.
|
45
|
+
#
|
46
|
+
# @param [Hash] opts Options hash
|
47
|
+
# @option opts [String] :name ('') The name of the card to scrape
|
48
|
+
#
|
49
|
+
# @return [Scapeshift::Crawlers::Single] The Single crawler object
|
50
|
+
#
|
51
|
+
# @raise [Scapeshift::Errors::InsufficientOptions] If :name isn't passed
|
52
|
+
#
|
53
|
+
# @author Josh Lindsey
|
54
|
+
#
|
55
|
+
# @since 0.3.0
|
56
|
+
#
|
57
|
+
def initialize(opts = {})
|
58
|
+
super opts
|
59
|
+
|
60
|
+
@card = Scapeshift::Card.new
|
61
|
+
|
62
|
+
if self.options[:name].nil? and self.options[:multiverse_id].nil?
|
63
|
+
raise Scapeshift::Errors::InsufficientOptions.new "This crawler MUST be passed one of :name or :multiverse_id"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# Scrapes the Oracle card detail page for the specified card name.
|
69
|
+
# Overrides the {Base#crawl} method.
|
70
|
+
#
|
71
|
+
# @return [Scapeshift::Card] The Card containing the scraped data
|
72
|
+
#
|
73
|
+
# @raise [Scapeshift::Errors::CardNameAmbiguousOrNotFound]
|
74
|
+
# If instead of being redirected to the Card detail page, this crawler
|
75
|
+
# finds itself on a search results page.
|
76
|
+
#
|
77
|
+
# @author Josh Lindsey
|
78
|
+
#
|
79
|
+
# @since 0.2.0
|
80
|
+
#
|
81
|
+
def crawl
|
82
|
+
uri_str = if not self.options[:multiverse_id].nil?
|
83
|
+
Card_Multiverse_ID_Search_URI + self.options[:multiverse_id].to_s
|
84
|
+
elsif not self.options[:name].nil?
|
85
|
+
self.options[:name].split(' ').inject(Card_Name_Search_URI) { |memo, word| memo + (Card_Name_Frag % word) }
|
86
|
+
end
|
87
|
+
|
88
|
+
@doc = Nokogiri::HTML open(URI.escape uri_str)
|
89
|
+
|
90
|
+
self.hook :before_scrape, @doc
|
91
|
+
|
92
|
+
# Check to make sure we're actually on the card detail page.
|
93
|
+
unless doc.css('div.filterList').empty?
|
94
|
+
raise Scapeshift::Errors::CardNameAmbiguousOrNotFound.new "Unable to find card: '#{options[:name]}'"
|
95
|
+
end
|
96
|
+
|
97
|
+
@card.name = _parse_name doc
|
98
|
+
self.hook :every_attr, @card
|
99
|
+
|
100
|
+
@card.cost = _parse_cost doc
|
101
|
+
self.hook :every_attr, @card
|
102
|
+
|
103
|
+
@card.types = _parse_types doc
|
104
|
+
self.hook :every_attr, @card
|
105
|
+
|
106
|
+
@card.text = _parse_text doc
|
107
|
+
self.hook :every_attr, @card
|
108
|
+
|
109
|
+
@card.flavour_text = _parse_flavour_text doc
|
110
|
+
self.hook :every_attr, @card
|
111
|
+
|
112
|
+
@card.sets = _parse_sets doc
|
113
|
+
self.hook :every_attr, @card
|
114
|
+
|
115
|
+
@card.pow_tgh = _parse_pow_tgh doc
|
116
|
+
self.hook :every_attr, @card
|
117
|
+
|
118
|
+
@card.loyalty = _parse_loyalty doc
|
119
|
+
self.hook :every_attr, @card
|
120
|
+
|
121
|
+
@card.artist = _parse_artist doc
|
122
|
+
self.hook :every_attr, @card
|
123
|
+
|
124
|
+
@card.multiverse_id = _parse_multiverse_id doc
|
125
|
+
self.hook :every_attr, @card
|
126
|
+
|
127
|
+
@card.image_uri_from_id = @card.multiverse_id
|
128
|
+
self.hook :every_attr, @card
|
129
|
+
|
130
|
+
@card.number = _parse_number doc
|
131
|
+
self.hook :every_attr, @card
|
132
|
+
|
133
|
+
self.hook :after_scrape, @card
|
134
|
+
|
135
|
+
@card
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
##
|
141
|
+
# Scrape the card name from the detail page.
|
142
|
+
#
|
143
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
144
|
+
#
|
145
|
+
# @return [String] The card's name
|
146
|
+
#
|
147
|
+
# @author Josh Lindsey
|
148
|
+
#
|
149
|
+
# @since 0.2.0
|
150
|
+
#
|
151
|
+
def _parse_name doc
|
152
|
+
doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_nameRow/div[2]').
|
153
|
+
children.first.to_s.strip
|
154
|
+
end
|
155
|
+
|
156
|
+
##
|
157
|
+
# Scrape the card's mana cost from the detail page.
|
158
|
+
#
|
159
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
160
|
+
#
|
161
|
+
# @return [String] The formatted string representation of the card's cost.
|
162
|
+
# (eg. "2BU")
|
163
|
+
#
|
164
|
+
# @see Scapeshift::Card.cost_symbol_from_str
|
165
|
+
#
|
166
|
+
# @author Josh Lindsey
|
167
|
+
#
|
168
|
+
# @since 0.2.0
|
169
|
+
#
|
170
|
+
def _parse_cost doc
|
171
|
+
str = ''
|
172
|
+
costs = doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_manaRow/div[2]/img')
|
173
|
+
costs.each { |cost| str << Scapeshift::Card.cost_symbol_from_str(cost['alt']) }
|
174
|
+
str
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# Scrape the card's types from the detail page.
|
179
|
+
#
|
180
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
181
|
+
#
|
182
|
+
# @return [String] The types line string
|
183
|
+
#
|
184
|
+
# @see Scapeshift::Card#types=
|
185
|
+
#
|
186
|
+
# @author Josh Lindsey
|
187
|
+
#
|
188
|
+
# @since 0.2.0
|
189
|
+
#
|
190
|
+
def _parse_types doc
|
191
|
+
doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_typeRow/div[2]').
|
192
|
+
children.first.to_s.strip
|
193
|
+
end
|
194
|
+
|
195
|
+
##
|
196
|
+
# Scrape the card's rules text from the detail page.
|
197
|
+
#
|
198
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
199
|
+
#
|
200
|
+
# @return [String] The rules text
|
201
|
+
#
|
202
|
+
# @see #_recursive_parse_text
|
203
|
+
#
|
204
|
+
# @author Josh Lindsey
|
205
|
+
#
|
206
|
+
# @since 0.2.0
|
207
|
+
#
|
208
|
+
def _parse_flavour_text doc
|
209
|
+
flavour_text = ''
|
210
|
+
blocks = doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_FlavorText/div[@class=cardtextbox]')
|
211
|
+
_recursive_parse_text blocks, 0, nil, flavour_text
|
212
|
+
flavour_text.strip
|
213
|
+
end
|
214
|
+
|
215
|
+
##
|
216
|
+
# Scrape the card's flavour text from the detail page.
|
217
|
+
#
|
218
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
219
|
+
#
|
220
|
+
# @return [String] The flavour text
|
221
|
+
#
|
222
|
+
# @see #_recursive_parse_text
|
223
|
+
#
|
224
|
+
# @author Eric Cohen
|
225
|
+
#
|
226
|
+
# @since 1.0.1
|
227
|
+
#
|
228
|
+
def _parse_text doc
|
229
|
+
text = ''
|
230
|
+
blocks = doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_textRow/div[2]/div[@class=cardtextbox]')
|
231
|
+
_recursive_parse_text blocks, 0, nil, text
|
232
|
+
text.strip
|
233
|
+
end
|
234
|
+
|
235
|
+
##
|
236
|
+
# Scrapes the printings (sets and rarities) of the card.
|
237
|
+
#
|
238
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
239
|
+
#
|
240
|
+
# @return [Array] The array of sets and rarities
|
241
|
+
#
|
242
|
+
# @author Josh Lindsey
|
243
|
+
#
|
244
|
+
# @since 0.2.0
|
245
|
+
#
|
246
|
+
def _parse_sets doc
|
247
|
+
regex = /^(.*?) \((.*?)\)$/
|
248
|
+
sets_ary = []
|
249
|
+
|
250
|
+
current = doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_setRow')./('img').first['title']
|
251
|
+
current =~ regex
|
252
|
+
sets_ary << [$1, $2]
|
253
|
+
|
254
|
+
others = doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_otherSetsRow')./('img')
|
255
|
+
others.each do |other|
|
256
|
+
other['title'] =~ regex
|
257
|
+
sets_ary << [$1, $2]
|
258
|
+
end
|
259
|
+
|
260
|
+
sets_ary
|
261
|
+
end
|
262
|
+
|
263
|
+
##
|
264
|
+
# Scapes the card's Power and Toughness (if a creature card).
|
265
|
+
#
|
266
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
267
|
+
#
|
268
|
+
# @return [Array] The power and toughness
|
269
|
+
# @return [nil] If it's not a creature
|
270
|
+
#
|
271
|
+
# @author Josh Lindsey
|
272
|
+
#
|
273
|
+
# @since 0.2.0
|
274
|
+
#
|
275
|
+
def _parse_pow_tgh doc
|
276
|
+
pt_row = doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_ptRow')
|
277
|
+
return nil if pt_row.empty?
|
278
|
+
|
279
|
+
pt_str = pt_row./('div[2]').children.first.to_s.strip
|
280
|
+
pt_str =~ /^(.*?) \/ (.*?)$/
|
281
|
+
[$1, $2]
|
282
|
+
end
|
283
|
+
|
284
|
+
##
|
285
|
+
# Scrapes the card's loyalty (if a planeswalker card).
|
286
|
+
#
|
287
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
288
|
+
#
|
289
|
+
# @return [String] The card's loyalty
|
290
|
+
# @return [nil] If it's not a planeswalker
|
291
|
+
#
|
292
|
+
# @author Eric Cohen
|
293
|
+
#
|
294
|
+
# @since 1.0.1
|
295
|
+
#
|
296
|
+
def _parse_loyalty doc
|
297
|
+
loyalty_row = doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_ptRow')
|
298
|
+
return nil if loyalty_row.empty?
|
299
|
+
|
300
|
+
loyalty = loyalty_row./('div[2]').children.first.to_s.strip
|
301
|
+
loyalty =~ /^([0-9]*)$/
|
302
|
+
$1
|
303
|
+
end
|
304
|
+
|
305
|
+
##
|
306
|
+
# Scrapes the name of the Artist of this card.
|
307
|
+
#
|
308
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
309
|
+
#
|
310
|
+
# @return [String] The card's Artist
|
311
|
+
#
|
312
|
+
# @author Eric Cohen
|
313
|
+
#
|
314
|
+
# @since 1.0.1
|
315
|
+
#
|
316
|
+
def _parse_artist doc
|
317
|
+
doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_ArtistCredit/a').
|
318
|
+
children.first.to_s.strip
|
319
|
+
end
|
320
|
+
|
321
|
+
##
|
322
|
+
# Scapes the multiverse ID of this card so the Card object can
|
323
|
+
# interpolate it into the image URI.
|
324
|
+
#
|
325
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
326
|
+
#
|
327
|
+
# @return [String] The mutliverse ID of this card
|
328
|
+
#
|
329
|
+
# @author Josh Lindsey
|
330
|
+
#
|
331
|
+
# @since 0.2.0
|
332
|
+
#
|
333
|
+
def _parse_multiverse_id doc
|
334
|
+
src = doc.css('img#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_cardImage').first['src']
|
335
|
+
src =~ /multiverseid=(.*?)&/
|
336
|
+
$1
|
337
|
+
end
|
338
|
+
|
339
|
+
##
|
340
|
+
# Scapes the card number of this card.
|
341
|
+
#
|
342
|
+
# @param [Nokogiri::HTML::Document] doc The detail page document
|
343
|
+
#
|
344
|
+
# @return [String] The mutliverse ID of this card
|
345
|
+
#
|
346
|
+
# @author Eric Cohen
|
347
|
+
#
|
348
|
+
# @since 1.0.1
|
349
|
+
#
|
350
|
+
def _parse_number doc
|
351
|
+
doc.css('div#ctl00_ctl00_ctl00_MainContent_SubContent_SubContent_numberRow .value').
|
352
|
+
children.first.to_s.strip
|
353
|
+
end
|
354
|
+
|
355
|
+
##
|
356
|
+
# Recursively parse the detail page text, since it's contained within
|
357
|
+
# elements of its own. Also converts mana images to symbols. Called from
|
358
|
+
# {#_parse_text}.
|
359
|
+
#
|
360
|
+
# @param [Array] node_ary The array of nodes for the current recursion
|
361
|
+
# @param [Integer] pos The current position in the current node_ary
|
362
|
+
# @param [Symbol] last_element The last element traversed, used for formatting
|
363
|
+
# @param [String] text A pointer to the text string we're building
|
364
|
+
#
|
365
|
+
# @see #_parse_text
|
366
|
+
# @see Scapeshift::Card.cost_symbol_from_str
|
367
|
+
#
|
368
|
+
# @author Josh Lindsey
|
369
|
+
#
|
370
|
+
# @since 0.2.0
|
371
|
+
#
|
372
|
+
def _recursive_parse_text node_ary, pos, last_element, text
|
373
|
+
node = node_ary[pos]
|
374
|
+
return if node.nil?
|
375
|
+
|
376
|
+
# Text holder div
|
377
|
+
if node.is_a?(Nokogiri::XML::Element) and node['class'] == 'cardtextbox'
|
378
|
+
text << "\n"
|
379
|
+
_recursive_parse_text node.children, 0, :div, text
|
380
|
+
|
381
|
+
# Mana image
|
382
|
+
elsif node.is_a?(Nokogiri::XML::Element) and node.name == 'img'
|
383
|
+
text << ' ' unless last_element == :img
|
384
|
+
text << Scapeshift::Card.cost_symbol_from_str(node['alt'])
|
385
|
+
last_element = :img
|
386
|
+
|
387
|
+
# Keyword text
|
388
|
+
elsif node.is_a?(Nokogiri::XML::Element) and node.name == 'i'
|
389
|
+
text << ' ' if last_element == :img
|
390
|
+
_recursive_parse_text node.children, 0, :i, text
|
391
|
+
|
392
|
+
# Regular text
|
393
|
+
elsif node.is_a? Nokogiri::XML::Text
|
394
|
+
text << ' ' if last_element == :img
|
395
|
+
text << node.to_s.strip
|
396
|
+
last_element = :text
|
397
|
+
end
|
398
|
+
|
399
|
+
_recursive_parse_text node_ary, pos+1, last_element, text
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|