rdf-microdata 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +6 -14
- data/README +5 -9
- data/VERSION +1 -1
- data/etc/doap.html +3 -2
- data/lib/rdf/microdata/reader.rb +3 -20
- data/lib/rdf/microdata/vocab.rb +0 -2
- metadata +46 -61
- data/lib/rdf/microdata/reader/rexml.rb +0 -277
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
metadata.gz: !binary |-
|
9
|
-
OTRjNzBmOTU3YzZjNzM3YmNjN2E0NGMyOWVjMzk4OWNkOWUzMDc1Y2ZjZGUw
|
10
|
-
YmU3OTMwNzcyNzE2ODVhYzQxZmRjZGI3M2VjOGUzNTZlNzJmNjAwODM3MzA1
|
11
|
-
Nzg0YTg1ZDc0MTdiNjQ5NzMyYjlkMzQ4ZDI2NGY4ZmYzNmQxZjk=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
N2QyMTMwYTlmMTVlY2M1N2ZmYzM0YzNkZTkzZWI0Yzg4MWU3MWI3OTJhMTM0
|
14
|
-
NjNlZTg3Yzg0MTAwN2Y5MzQyOWExYWRkNmEzYTE2MTk2NzlkOTdhMzg5M2Yw
|
15
|
-
MzAwNzU3MjExOTY2Nzg0Njk2M2RmODJkMDRkYTgyMTEwY2UxMGU=
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c11ca4f8b2d95abe7fcd1c55b68f667cc5ed814b
|
4
|
+
data.tar.gz: 27d2a085d5862f01a7080d364804ffc1eda971a4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a3ec50424a823fe88c0f665f34fae0c52383ab7660f8ff6c57ec7c42f40c2d44923c8c80303e4079a5e8f1694e75a72cfb9ffe0f89176a1a593392bb8233e6b9
|
7
|
+
data.tar.gz: 2b3cce242ba4902d390adeae2b24be479a86aab392717187b53a2ff7fe57c20d4914cb06419ab905a4f8dd3cb6c6796b18832db7ba65541895b5bb60e77c2005
|
data/README
CHANGED
@@ -12,7 +12,7 @@ RDF::Microdata is a Microdata reader for Ruby using the [RDF.rb][RDF.rb] library
|
|
12
12
|
RDF::Microdata parses [Microdata][] into statements or triples using the rules defined in [Microdata RDF][].
|
13
13
|
|
14
14
|
* Microdata parser.
|
15
|
-
*
|
15
|
+
* Uses Nokogiri for parsing HTML
|
16
16
|
|
17
17
|
Install with 'gem install rdf-microdata'
|
18
18
|
|
@@ -36,10 +36,11 @@ GRDDL-type triple generation, such as for html>head>title anchor tags.
|
|
36
36
|
If the `RDFa` parser is available, {RDF::Microdata::Format} will not assert content type `text/html` or file extension `.html`, as this is also asserted by RDFa. Instead, the RDFa reader will invoke the microdata reader if an `@itemscope` attribute is detected.
|
37
37
|
|
38
38
|
## Dependencies
|
39
|
-
* [RDF.rb](http://rubygems.org/gems/rdf) (>= 1.
|
40
|
-
* [RDF::XSD](http://rubygems.org/gems/rdf-xsd) (>= 1.
|
39
|
+
* [RDF.rb](http://rubygems.org/gems/rdf) (>= 1.1)
|
40
|
+
* [RDF::XSD](http://rubygems.org/gems/rdf-xsd) (>= 1.1)
|
41
|
+
* [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.5.9)
|
41
42
|
* [HTMLEntities](https://rubygems.org/gems/htmlentities) ('>= 4.3.0')
|
42
|
-
* Soft dependency on [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.5.
|
43
|
+
* Soft dependency on [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.5.9)
|
43
44
|
|
44
45
|
## Documentation
|
45
46
|
Full documentation available on [Rubydoc.info][Microdata doc]
|
@@ -49,14 +50,9 @@ Full documentation available on [Rubydoc.info][Microdata doc]
|
|
49
50
|
Asserts :html format, text/html mime-type and .html file extension.
|
50
51
|
* {RDF::Microdata::Reader}
|
51
52
|
* {RDF::Microdata::Reader::Nokogiri}
|
52
|
-
* {RDF::Microdata::Reader::REXML}
|
53
53
|
|
54
54
|
### Additional vocabularies
|
55
55
|
|
56
|
-
## TODO
|
57
|
-
* Add support for LibXML and REXML bindings, and use the best available
|
58
|
-
* Consider a SAX-based parser for improved performance
|
59
|
-
|
60
56
|
## Resources
|
61
57
|
* [RDF.rb][RDF.rb]
|
62
58
|
* [Documentation](http://rdf.rubyforge.org/microdata)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0
|
1
|
+
1.1.0
|
data/etc/doap.html
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
<html itemscope itemid="http://rubygems.org/gems/rdf-microdata" itemtype="http://usefulinc.com/ns/doap#Project">
|
3
3
|
<head>
|
4
4
|
<title lang="en" itemprop="shortdesc">Microdata reader for Ruby.</title>
|
5
|
+
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" >
|
5
6
|
</head>
|
6
|
-
<body
|
7
|
+
<body>
|
7
8
|
<p>Project description for <span itemprop="name">RDF::Microdata</span>.</p>
|
8
9
|
<p lang="en" itemprop="description">
|
9
10
|
RDF::Microdata is an Microdata reader for Ruby using the RDF.rb library suite.
|
@@ -13,7 +14,7 @@
|
|
13
14
|
<a itemprop="http://purl.org/dc/terms/creator developer documenter maintainer http://xmlns.com/foaf/0.1/creator" href="http://greggkellogg.net/foaf#me"
|
14
15
|
>Gregg Kellogg</a>
|
15
16
|
</dd>
|
16
|
-
<dt>Created</dt><time itemprop="created" datetime="2011-08-29"
|
17
|
+
<dt>Created</dt><dd><time itemprop="created" datetime="2011-08-29">2011-08-29</time></dd>
|
17
18
|
<dt>Blog</dt><dd><a href="http://greggkellogg.net/" itemprop="blog">http://greggkellogg.net/</a></dd>
|
18
19
|
<dt>Bug DB</dt><dd>
|
19
20
|
<a href="http://github.com/ruby-rdf/rdf-microdata/issues" itemprop="bug-database">
|
data/lib/rdf/microdata/reader.rb
CHANGED
@@ -1,9 +1,4 @@
|
|
1
|
-
|
2
|
-
raise LoadError, "not with java" if RUBY_PLATFORM == "java"
|
3
|
-
require 'nokogiri'
|
4
|
-
rescue LoadError => e
|
5
|
-
:rexml
|
6
|
-
end
|
1
|
+
require 'nokogiri'
|
7
2
|
require 'rdf/xsd'
|
8
3
|
require 'json'
|
9
4
|
|
@@ -204,8 +199,6 @@ module RDF::Microdata
|
|
204
199
|
# the input stream to read
|
205
200
|
# @param [Hash{Symbol => Object}] options
|
206
201
|
# any additional options
|
207
|
-
# @option options [Symbol] :library (:nokogiri)
|
208
|
-
# One of :nokogiri or :rexml. If nil/unspecified uses :nokogiri if available, :rexml otherwise.
|
209
202
|
# @option options [Encoding] :encoding (Encoding::UTF_8)
|
210
203
|
# the encoding of the input stream (Ruby 1.9+)
|
211
204
|
# @option options [Boolean] :validate (false)
|
@@ -231,20 +224,10 @@ module RDF::Microdata
|
|
231
224
|
@debug = options[:debug]
|
232
225
|
@vocab_expansion = options.fetch(:vocab_expansion, true)
|
233
226
|
|
234
|
-
@library =
|
235
|
-
when nil
|
236
|
-
(defined?(::Nokogiri) && RUBY_PLATFORM != 'java') ? :nokogiri : :rexml
|
237
|
-
when :nokogiri, :rexml
|
238
|
-
options[:library]
|
239
|
-
else
|
240
|
-
raise ArgumentError.new("expected :rexml or :nokogiri, but got #{options[:library].inspect}")
|
241
|
-
end
|
227
|
+
@library = :nokogiri
|
242
228
|
|
243
229
|
require "rdf/microdata/reader/#{@library}"
|
244
|
-
@implementation =
|
245
|
-
when :nokogiri then Nokogiri
|
246
|
-
when :rexml then REXML
|
247
|
-
end
|
230
|
+
@implementation = Nokogiri
|
248
231
|
self.extend(@implementation)
|
249
232
|
|
250
233
|
initialize_html(input, options) rescue raise RDF::ReaderError.new($!.message)
|
data/lib/rdf/microdata/vocab.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
module RDF
|
2
2
|
class MD < Vocabulary("http://www.w3.org/ns/md#"); end
|
3
|
-
class Schema < Vocabulary("http://schema.org/"); end
|
4
|
-
class XHV < Vocabulary("http://www.w3.org/1999/xhtml/vocab#"); end
|
5
3
|
class HCard < Vocabulary("http://microformats.org/profile/hcard#"); end
|
6
4
|
class HCalendar < Vocabulary("http://microformats.org/profile/hcalendar"); end
|
7
5
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rdf-microdata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg
|
@@ -9,120 +9,106 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-12-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rdf
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - '>='
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 1.0
|
20
|
+
version: 1.1.0
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - '>='
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: 1.0
|
28
|
-
- !ruby/object:Gem::Dependency
|
29
|
-
name: json
|
30
|
-
requirement: !ruby/object:Gem::Requirement
|
31
|
-
requirements:
|
32
|
-
- - ! '>='
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
version: 1.7.7
|
35
|
-
type: :runtime
|
36
|
-
prerelease: false
|
37
|
-
version_requirements: !ruby/object:Gem::Requirement
|
38
|
-
requirements:
|
39
|
-
- - ! '>='
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
version: 1.7.7
|
27
|
+
version: 1.1.0
|
42
28
|
- !ruby/object:Gem::Dependency
|
43
29
|
name: rdf-xsd
|
44
30
|
requirement: !ruby/object:Gem::Requirement
|
45
31
|
requirements:
|
46
|
-
- -
|
32
|
+
- - '>='
|
47
33
|
- !ruby/object:Gem::Version
|
48
|
-
version:
|
34
|
+
version: 1.1.0
|
49
35
|
type: :runtime
|
50
36
|
prerelease: false
|
51
37
|
version_requirements: !ruby/object:Gem::Requirement
|
52
38
|
requirements:
|
53
|
-
- -
|
39
|
+
- - '>='
|
54
40
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
41
|
+
version: 1.1.0
|
56
42
|
- !ruby/object:Gem::Dependency
|
57
43
|
name: htmlentities
|
58
44
|
requirement: !ruby/object:Gem::Requirement
|
59
45
|
requirements:
|
60
|
-
- -
|
46
|
+
- - '>='
|
61
47
|
- !ruby/object:Gem::Version
|
62
48
|
version: 4.3.0
|
63
49
|
type: :runtime
|
64
50
|
prerelease: false
|
65
51
|
version_requirements: !ruby/object:Gem::Requirement
|
66
52
|
requirements:
|
67
|
-
- -
|
53
|
+
- - '>='
|
68
54
|
- !ruby/object:Gem::Version
|
69
55
|
version: 4.3.0
|
70
56
|
- !ruby/object:Gem::Dependency
|
71
57
|
name: nokogiri
|
72
58
|
requirement: !ruby/object:Gem::Requirement
|
73
59
|
requirements:
|
74
|
-
- -
|
60
|
+
- - '>='
|
75
61
|
- !ruby/object:Gem::Version
|
76
|
-
version: 1.
|
62
|
+
version: 1.6.0
|
77
63
|
type: :development
|
78
64
|
prerelease: false
|
79
65
|
version_requirements: !ruby/object:Gem::Requirement
|
80
66
|
requirements:
|
81
|
-
- -
|
67
|
+
- - '>='
|
82
68
|
- !ruby/object:Gem::Version
|
83
|
-
version: 1.
|
69
|
+
version: 1.6.0
|
84
70
|
- !ruby/object:Gem::Dependency
|
85
71
|
name: equivalent-xml
|
86
72
|
requirement: !ruby/object:Gem::Requirement
|
87
73
|
requirements:
|
88
|
-
- -
|
74
|
+
- - '>='
|
89
75
|
- !ruby/object:Gem::Version
|
90
76
|
version: 0.3.0
|
91
77
|
type: :development
|
92
78
|
prerelease: false
|
93
79
|
version_requirements: !ruby/object:Gem::Requirement
|
94
80
|
requirements:
|
95
|
-
- -
|
81
|
+
- - '>='
|
96
82
|
- !ruby/object:Gem::Version
|
97
83
|
version: 0.3.0
|
98
84
|
- !ruby/object:Gem::Dependency
|
99
85
|
name: open-uri-cached
|
100
86
|
requirement: !ruby/object:Gem::Requirement
|
101
87
|
requirements:
|
102
|
-
- -
|
88
|
+
- - '>='
|
103
89
|
- !ruby/object:Gem::Version
|
104
90
|
version: 0.0.5
|
105
91
|
type: :development
|
106
92
|
prerelease: false
|
107
93
|
version_requirements: !ruby/object:Gem::Requirement
|
108
94
|
requirements:
|
109
|
-
- -
|
95
|
+
- - '>='
|
110
96
|
- !ruby/object:Gem::Version
|
111
97
|
version: 0.0.5
|
112
98
|
- !ruby/object:Gem::Dependency
|
113
99
|
name: yard
|
114
100
|
requirement: !ruby/object:Gem::Requirement
|
115
101
|
requirements:
|
116
|
-
- -
|
102
|
+
- - '>='
|
117
103
|
- !ruby/object:Gem::Version
|
118
|
-
version: 0.8.
|
104
|
+
version: 0.8.7
|
119
105
|
type: :development
|
120
106
|
prerelease: false
|
121
107
|
version_requirements: !ruby/object:Gem::Requirement
|
122
108
|
requirements:
|
123
|
-
- -
|
109
|
+
- - '>='
|
124
110
|
- !ruby/object:Gem::Version
|
125
|
-
version: 0.8.
|
111
|
+
version: 0.8.7
|
126
112
|
- !ruby/object:Gem::Dependency
|
127
113
|
name: spira
|
128
114
|
requirement: !ruby/object:Gem::Requirement
|
@@ -141,72 +127,72 @@ dependencies:
|
|
141
127
|
name: rspec
|
142
128
|
requirement: !ruby/object:Gem::Requirement
|
143
129
|
requirements:
|
144
|
-
- -
|
130
|
+
- - '>='
|
145
131
|
- !ruby/object:Gem::Version
|
146
132
|
version: 2.14.0
|
147
133
|
type: :development
|
148
134
|
prerelease: false
|
149
135
|
version_requirements: !ruby/object:Gem::Requirement
|
150
136
|
requirements:
|
151
|
-
- -
|
137
|
+
- - '>='
|
152
138
|
- !ruby/object:Gem::Version
|
153
139
|
version: 2.14.0
|
154
140
|
- !ruby/object:Gem::Dependency
|
155
141
|
name: rdf-spec
|
156
142
|
requirement: !ruby/object:Gem::Requirement
|
157
143
|
requirements:
|
158
|
-
- -
|
144
|
+
- - '>='
|
159
145
|
- !ruby/object:Gem::Version
|
160
|
-
version:
|
146
|
+
version: 1.1.0
|
161
147
|
type: :development
|
162
148
|
prerelease: false
|
163
149
|
version_requirements: !ruby/object:Gem::Requirement
|
164
150
|
requirements:
|
165
|
-
- -
|
151
|
+
- - '>='
|
166
152
|
- !ruby/object:Gem::Version
|
167
|
-
version:
|
153
|
+
version: 1.1.0
|
168
154
|
- !ruby/object:Gem::Dependency
|
169
155
|
name: rdf-rdfa
|
170
156
|
requirement: !ruby/object:Gem::Requirement
|
171
157
|
requirements:
|
172
|
-
- -
|
158
|
+
- - '>='
|
173
159
|
- !ruby/object:Gem::Version
|
174
|
-
version:
|
160
|
+
version: 1.1.0
|
175
161
|
type: :development
|
176
162
|
prerelease: false
|
177
163
|
version_requirements: !ruby/object:Gem::Requirement
|
178
164
|
requirements:
|
179
|
-
- -
|
165
|
+
- - '>='
|
180
166
|
- !ruby/object:Gem::Version
|
181
|
-
version:
|
167
|
+
version: 1.1.0
|
182
168
|
- !ruby/object:Gem::Dependency
|
183
169
|
name: rdf-turtle
|
184
170
|
requirement: !ruby/object:Gem::Requirement
|
185
171
|
requirements:
|
186
|
-
- -
|
172
|
+
- - '>='
|
187
173
|
- !ruby/object:Gem::Version
|
188
|
-
version: 1.0
|
174
|
+
version: 1.1.0
|
189
175
|
type: :development
|
190
176
|
prerelease: false
|
191
177
|
version_requirements: !ruby/object:Gem::Requirement
|
192
178
|
requirements:
|
193
|
-
- -
|
179
|
+
- - '>='
|
194
180
|
- !ruby/object:Gem::Version
|
195
|
-
version: 1.0
|
181
|
+
version: 1.1.0
|
196
182
|
- !ruby/object:Gem::Dependency
|
197
183
|
name: rdf-isomorphic
|
198
184
|
requirement: !ruby/object:Gem::Requirement
|
199
185
|
requirements:
|
200
|
-
- -
|
186
|
+
- - '>='
|
201
187
|
- !ruby/object:Gem::Version
|
202
|
-
version:
|
188
|
+
version: 1.1.0
|
203
189
|
type: :development
|
204
190
|
prerelease: false
|
205
191
|
version_requirements: !ruby/object:Gem::Requirement
|
206
192
|
requirements:
|
207
|
-
- -
|
193
|
+
- - '>='
|
208
194
|
- !ruby/object:Gem::Version
|
209
|
-
version:
|
195
|
+
version: 1.1.0
|
210
196
|
description: Microdata reader for Ruby.
|
211
197
|
email: public-rdf-ruby@w3.org
|
212
198
|
executables: []
|
@@ -220,7 +206,6 @@ files:
|
|
220
206
|
- lib/rdf/microdata/expansion.rb
|
221
207
|
- lib/rdf/microdata/format.rb
|
222
208
|
- lib/rdf/microdata/reader/nokogiri.rb
|
223
|
-
- lib/rdf/microdata/reader/rexml.rb
|
224
209
|
- lib/rdf/microdata/reader.rb
|
225
210
|
- lib/rdf/microdata/version.rb
|
226
211
|
- lib/rdf/microdata/vocab.rb
|
@@ -237,17 +222,17 @@ require_paths:
|
|
237
222
|
- lib
|
238
223
|
required_ruby_version: !ruby/object:Gem::Requirement
|
239
224
|
requirements:
|
240
|
-
- -
|
225
|
+
- - '>='
|
241
226
|
- !ruby/object:Gem::Version
|
242
|
-
version: 1.
|
227
|
+
version: 1.9.2
|
243
228
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
244
229
|
requirements:
|
245
|
-
- -
|
230
|
+
- - '>='
|
246
231
|
- !ruby/object:Gem::Version
|
247
232
|
version: '0'
|
248
233
|
requirements: []
|
249
234
|
rubyforge_project: rdf-microdata
|
250
|
-
rubygems_version: 2.
|
235
|
+
rubygems_version: 2.1.11
|
251
236
|
signing_key:
|
252
237
|
specification_version: 4
|
253
238
|
summary: Microdata reader for Ruby.
|
@@ -1,277 +0,0 @@
|
|
1
|
-
require 'htmlentities'
|
2
|
-
|
3
|
-
module RDF::Microdata
|
4
|
-
class Reader < RDF::Reader
|
5
|
-
##
|
6
|
-
# REXML implementation of an HTML parser.
|
7
|
-
#
|
8
|
-
# @see http://www.germane-software.com/software/rexml/
|
9
|
-
module REXML
|
10
|
-
##
|
11
|
-
# Returns the name of the underlying XML library.
|
12
|
-
#
|
13
|
-
# @return [Symbol]
|
14
|
-
def self.library
|
15
|
-
:rexml
|
16
|
-
end
|
17
|
-
|
18
|
-
# Proxy class to implement uniform element accessors
|
19
|
-
class NodeProxy
|
20
|
-
attr_reader :node
|
21
|
-
attr_reader :parent
|
22
|
-
|
23
|
-
def initialize(node, parent = nil)
|
24
|
-
@node = node
|
25
|
-
@parent = parent
|
26
|
-
end
|
27
|
-
|
28
|
-
##
|
29
|
-
# Element language
|
30
|
-
#
|
31
|
-
# From HTML5 3.2.3.3
|
32
|
-
# If both the lang attribute in no namespace and the lang attribute in the XML namespace are set
|
33
|
-
# on an element, user agents must use the lang attribute in the XML namespace, and the lang
|
34
|
-
# attribute in no namespace must be ignored for the purposes of determining the element's
|
35
|
-
# language.
|
36
|
-
#
|
37
|
-
# @return [String]
|
38
|
-
def language
|
39
|
-
language = case
|
40
|
-
when @node.attribute("lang")
|
41
|
-
@node.attribute("lang").to_s
|
42
|
-
else
|
43
|
-
parent && parent.element? && parent.language
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
##
|
48
|
-
# Return xml:base on element, if defined
|
49
|
-
#
|
50
|
-
# @return [String]
|
51
|
-
def base
|
52
|
-
if @base.nil?
|
53
|
-
@base = attributes['xml:base'] ||
|
54
|
-
(parent && parent.element? && parent.base) ||
|
55
|
-
false
|
56
|
-
end
|
57
|
-
|
58
|
-
@base == false ? nil : @base
|
59
|
-
end
|
60
|
-
|
61
|
-
def display_path
|
62
|
-
@display_path ||= begin
|
63
|
-
path = []
|
64
|
-
path << parent.display_path if parent
|
65
|
-
path << @node.name
|
66
|
-
case @node
|
67
|
-
when ::REXML::Element then path.join("/")
|
68
|
-
when ::REXML::Attribute then path.join("@")
|
69
|
-
else path.join("?")
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
##
|
75
|
-
# Return true of all child elements are text
|
76
|
-
#
|
77
|
-
# @return [Array<:text, :element, :attribute>]
|
78
|
-
def text_content?
|
79
|
-
@node.children.all? {|c| c.is_a?(::REXML::Text)}
|
80
|
-
end
|
81
|
-
|
82
|
-
##
|
83
|
-
# Retrieve XMLNS definitions for this element
|
84
|
-
#
|
85
|
-
# @return [Hash{String => String}]
|
86
|
-
def namespaces
|
87
|
-
ns_decls = {}
|
88
|
-
@node.attributes.each do |name, attr|
|
89
|
-
next unless name =~ /^xmlns(?:\:(.+))?/
|
90
|
-
ns_decls[$1] = attr
|
91
|
-
end
|
92
|
-
ns_decls
|
93
|
-
end
|
94
|
-
|
95
|
-
##
|
96
|
-
# Children of this node
|
97
|
-
#
|
98
|
-
# @return [NodeSetProxy]
|
99
|
-
def children
|
100
|
-
NodeSetProxy.new(@node.children, self)
|
101
|
-
end
|
102
|
-
|
103
|
-
##
|
104
|
-
# Elements of this node
|
105
|
-
#
|
106
|
-
# @return [NodeSetProxy]
|
107
|
-
def elements
|
108
|
-
NodeSetProxy.new(@node.children.select {|c| c.is_a?(::REXML::Element)}, self)
|
109
|
-
end
|
110
|
-
|
111
|
-
##
|
112
|
-
# Inner text of an element
|
113
|
-
#
|
114
|
-
# @see http://apidock.com/ruby/REXML/Element/get_text#743-Get-all-inner-texts
|
115
|
-
# @return [String]
|
116
|
-
def inner_text
|
117
|
-
coder = HTMLEntities.new
|
118
|
-
::REXML::XPath.match(@node,'.//text()').map { |e|
|
119
|
-
coder.decode(e)
|
120
|
-
}.join
|
121
|
-
end
|
122
|
-
|
123
|
-
##
|
124
|
-
# Inner text of an element
|
125
|
-
#
|
126
|
-
# @see http://apidock.com/ruby/REXML/Element/get_text#743-Get-all-inner-texts
|
127
|
-
# @return [String]
|
128
|
-
def inner_html
|
129
|
-
@node.children.map(&:to_s).join
|
130
|
-
end
|
131
|
-
|
132
|
-
##
|
133
|
-
# Node type accessors
|
134
|
-
#
|
135
|
-
# @return [Boolean]
|
136
|
-
def element?
|
137
|
-
@node.is_a?(::REXML::Element)
|
138
|
-
end
|
139
|
-
|
140
|
-
def has_attribute?(attr)
|
141
|
-
!!node.attribute(attr)
|
142
|
-
end
|
143
|
-
|
144
|
-
##
|
145
|
-
# Proxy for everything else to @node
|
146
|
-
def method_missing(method, *args)
|
147
|
-
@node.send(method, *args)
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
##
|
152
|
-
# NodeSet proxy
|
153
|
-
class NodeSetProxy
|
154
|
-
attr_reader :node_set
|
155
|
-
attr_reader :parent
|
156
|
-
|
157
|
-
def initialize(node_set, parent)
|
158
|
-
@node_set = node_set
|
159
|
-
@parent = parent
|
160
|
-
end
|
161
|
-
|
162
|
-
##
|
163
|
-
# Return a proxy for each child
|
164
|
-
#
|
165
|
-
# @yield child
|
166
|
-
# @yieldparam [NodeProxy] child
|
167
|
-
def each
|
168
|
-
@node_set.each do |c|
|
169
|
-
yield NodeProxy.new(c, parent)
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
##
|
174
|
-
# Return proxy for first element and remove it
|
175
|
-
# @return [NodeProxy]
|
176
|
-
def shift
|
177
|
-
(e = node_set.shift) && NodeProxy.new(e, parent)
|
178
|
-
end
|
179
|
-
|
180
|
-
##
|
181
|
-
# Add NodeSetProxys
|
182
|
-
# @param [NodeSetProxy, REXML::Element] other
|
183
|
-
# @return [NodeSetProxy]
|
184
|
-
def +(other)
|
185
|
-
new_ns = node_set.clone
|
186
|
-
other.node_set.each {|n| new_ns << n}
|
187
|
-
NodeSetProxy.new(new_ns, parent)
|
188
|
-
end
|
189
|
-
|
190
|
-
##
|
191
|
-
# Add a NodeProxy
|
192
|
-
# @param [NodeProxy, REXML::Element] elem
|
193
|
-
# @return [NodeSetProxy]
|
194
|
-
def <<(elem)
|
195
|
-
node_set << (elem.is_a?(NodeProxy) ? elem.node : elem)
|
196
|
-
self
|
197
|
-
end
|
198
|
-
|
199
|
-
def inspect
|
200
|
-
@node_set.map {|c| NodeProxy.new(c, parent).display_path}.inspect
|
201
|
-
end
|
202
|
-
|
203
|
-
##
|
204
|
-
# Proxy for everything else to @node_set
|
205
|
-
def method_missing(method, *args)
|
206
|
-
@node_set.send(method, *args)
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
|
-
##
|
211
|
-
# Initializes the underlying XML library.
|
212
|
-
#
|
213
|
-
# @param [Hash{Symbol => Object}] options
|
214
|
-
# @return [void]
|
215
|
-
def initialize_html(input, options = {})
|
216
|
-
require 'rexml/document' unless defined?(::REXML)
|
217
|
-
@doc = case input
|
218
|
-
when ::REXML::Document
|
219
|
-
input
|
220
|
-
else
|
221
|
-
# Try to detect charset from input
|
222
|
-
options[:encoding] ||= input.charset if input.respond_to?(:charset)
|
223
|
-
|
224
|
-
# Otherwise, default is utf-8
|
225
|
-
options[:encoding] ||= 'utf-8'
|
226
|
-
|
227
|
-
# Set xml:base for the document element, if defined
|
228
|
-
@base_uri = base_uri ? base_uri.to_s : nil
|
229
|
-
|
230
|
-
# Only parse as XML, no HTML mode
|
231
|
-
doc = ::REXML::Document.new(input.respond_to?(:read) ? input.read : input.to_s)
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
|
-
# Accessor methods to mask native elements & attributes
|
236
|
-
|
237
|
-
##
|
238
|
-
# Return proxy for document root
|
239
|
-
def root
|
240
|
-
@root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
|
241
|
-
end
|
242
|
-
|
243
|
-
##
|
244
|
-
# Document errors
|
245
|
-
def doc_errors
|
246
|
-
[]
|
247
|
-
end
|
248
|
-
|
249
|
-
##
|
250
|
-
# Find value of document base
|
251
|
-
#
|
252
|
-
# @param [String] base Existing base from URI or :base_uri
|
253
|
-
# @return [String]
|
254
|
-
def doc_base(base)
|
255
|
-
# find if the document has a base element
|
256
|
-
base_el = ::REXML::XPath.first(@doc, "/html/head/base")
|
257
|
-
base = base_el.attribute("href").to_s.split("#").first if base_el
|
258
|
-
|
259
|
-
base || @base_uri
|
260
|
-
end
|
261
|
-
|
262
|
-
##
|
263
|
-
# Based on Microdata element.getItems
|
264
|
-
#
|
265
|
-
# @see http://www.w3.org/TR/2011/WD-microdata-20110525/#top-level-microdata-items
|
266
|
-
def getItems
|
267
|
-
::REXML::XPath.match(@doc, "//[@itemscope]").select {|el| !el.attribute('itemprop')}.map {|n| NodeProxy.new(n)}
|
268
|
-
end
|
269
|
-
|
270
|
-
##
|
271
|
-
# Look up an element in the document by id
|
272
|
-
def find_element_by_id(id)
|
273
|
-
(e = ::REXML::XPath.first(@doc, "//[@id='#{id}']")) && NodeProxy.new(e)
|
274
|
-
end
|
275
|
-
end
|
276
|
-
end
|
277
|
-
end
|