rdf-microdata 1.0.3 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +6 -14
- data/README +5 -9
- data/VERSION +1 -1
- data/etc/doap.html +3 -2
- data/lib/rdf/microdata/reader.rb +3 -20
- data/lib/rdf/microdata/vocab.rb +0 -2
- metadata +46 -61
- data/lib/rdf/microdata/reader/rexml.rb +0 -277
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
metadata.gz: !binary |-
|
9
|
-
OTRjNzBmOTU3YzZjNzM3YmNjN2E0NGMyOWVjMzk4OWNkOWUzMDc1Y2ZjZGUw
|
10
|
-
YmU3OTMwNzcyNzE2ODVhYzQxZmRjZGI3M2VjOGUzNTZlNzJmNjAwODM3MzA1
|
11
|
-
Nzg0YTg1ZDc0MTdiNjQ5NzMyYjlkMzQ4ZDI2NGY4ZmYzNmQxZjk=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
N2QyMTMwYTlmMTVlY2M1N2ZmYzM0YzNkZTkzZWI0Yzg4MWU3MWI3OTJhMTM0
|
14
|
-
NjNlZTg3Yzg0MTAwN2Y5MzQyOWExYWRkNmEzYTE2MTk2NzlkOTdhMzg5M2Yw
|
15
|
-
MzAwNzU3MjExOTY2Nzg0Njk2M2RmODJkMDRkYTgyMTEwY2UxMGU=
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c11ca4f8b2d95abe7fcd1c55b68f667cc5ed814b
|
4
|
+
data.tar.gz: 27d2a085d5862f01a7080d364804ffc1eda971a4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a3ec50424a823fe88c0f665f34fae0c52383ab7660f8ff6c57ec7c42f40c2d44923c8c80303e4079a5e8f1694e75a72cfb9ffe0f89176a1a593392bb8233e6b9
|
7
|
+
data.tar.gz: 2b3cce242ba4902d390adeae2b24be479a86aab392717187b53a2ff7fe57c20d4914cb06419ab905a4f8dd3cb6c6796b18832db7ba65541895b5bb60e77c2005
|
data/README
CHANGED
@@ -12,7 +12,7 @@ RDF::Microdata is a Microdata reader for Ruby using the [RDF.rb][RDF.rb] library
|
|
12
12
|
RDF::Microdata parses [Microdata][] into statements or triples using the rules defined in [Microdata RDF][].
|
13
13
|
|
14
14
|
* Microdata parser.
|
15
|
-
*
|
15
|
+
* Uses Nokogiri for parsing HTML
|
16
16
|
|
17
17
|
Install with 'gem install rdf-microdata'
|
18
18
|
|
@@ -36,10 +36,11 @@ GRDDL-type triple generation, such as for html>head>title anchor tags.
|
|
36
36
|
If the `RDFa` parser is available, {RDF::Microdata::Format} will not assert content type `text/html` or file extension `.html`, as this is also asserted by RDFa. Instead, the RDFa reader will invoke the microdata reader if an `@itemscope` attribute is detected.
|
37
37
|
|
38
38
|
## Dependencies
|
39
|
-
* [RDF.rb](http://rubygems.org/gems/rdf) (>= 1.
|
40
|
-
* [RDF::XSD](http://rubygems.org/gems/rdf-xsd) (>= 1.
|
39
|
+
* [RDF.rb](http://rubygems.org/gems/rdf) (>= 1.1)
|
40
|
+
* [RDF::XSD](http://rubygems.org/gems/rdf-xsd) (>= 1.1)
|
41
|
+
* [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.5.9)
|
41
42
|
* [HTMLEntities](https://rubygems.org/gems/htmlentities) ('>= 4.3.0')
|
42
|
-
* Soft dependency on [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.5.
|
43
|
+
* Soft dependency on [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.5.9)
|
43
44
|
|
44
45
|
## Documentation
|
45
46
|
Full documentation available on [Rubydoc.info][Microdata doc]
|
@@ -49,14 +50,9 @@ Full documentation available on [Rubydoc.info][Microdata doc]
|
|
49
50
|
Asserts :html format, text/html mime-type and .html file extension.
|
50
51
|
* {RDF::Microdata::Reader}
|
51
52
|
* {RDF::Microdata::Reader::Nokogiri}
|
52
|
-
* {RDF::Microdata::Reader::REXML}
|
53
53
|
|
54
54
|
### Additional vocabularies
|
55
55
|
|
56
|
-
## TODO
|
57
|
-
* Add support for LibXML and REXML bindings, and use the best available
|
58
|
-
* Consider a SAX-based parser for improved performance
|
59
|
-
|
60
56
|
## Resources
|
61
57
|
* [RDF.rb][RDF.rb]
|
62
58
|
* [Documentation](http://rdf.rubyforge.org/microdata)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0
|
1
|
+
1.1.0
|
data/etc/doap.html
CHANGED
@@ -2,8 +2,9 @@
|
|
2
2
|
<html itemscope itemid="http://rubygems.org/gems/rdf-microdata" itemtype="http://usefulinc.com/ns/doap#Project">
|
3
3
|
<head>
|
4
4
|
<title lang="en" itemprop="shortdesc">Microdata reader for Ruby.</title>
|
5
|
+
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" >
|
5
6
|
</head>
|
6
|
-
<body
|
7
|
+
<body>
|
7
8
|
<p>Project description for <span itemprop="name">RDF::Microdata</span>.</p>
|
8
9
|
<p lang="en" itemprop="description">
|
9
10
|
RDF::Microdata is an Microdata reader for Ruby using the RDF.rb library suite.
|
@@ -13,7 +14,7 @@
|
|
13
14
|
<a itemprop="http://purl.org/dc/terms/creator developer documenter maintainer http://xmlns.com/foaf/0.1/creator" href="http://greggkellogg.net/foaf#me"
|
14
15
|
>Gregg Kellogg</a>
|
15
16
|
</dd>
|
16
|
-
<dt>Created</dt><time itemprop="created" datetime="2011-08-29"
|
17
|
+
<dt>Created</dt><dd><time itemprop="created" datetime="2011-08-29">2011-08-29</time></dd>
|
17
18
|
<dt>Blog</dt><dd><a href="http://greggkellogg.net/" itemprop="blog">http://greggkellogg.net/</a></dd>
|
18
19
|
<dt>Bug DB</dt><dd>
|
19
20
|
<a href="http://github.com/ruby-rdf/rdf-microdata/issues" itemprop="bug-database">
|
data/lib/rdf/microdata/reader.rb
CHANGED
@@ -1,9 +1,4 @@
|
|
1
|
-
|
2
|
-
raise LoadError, "not with java" if RUBY_PLATFORM == "java"
|
3
|
-
require 'nokogiri'
|
4
|
-
rescue LoadError => e
|
5
|
-
:rexml
|
6
|
-
end
|
1
|
+
require 'nokogiri'
|
7
2
|
require 'rdf/xsd'
|
8
3
|
require 'json'
|
9
4
|
|
@@ -204,8 +199,6 @@ module RDF::Microdata
|
|
204
199
|
# the input stream to read
|
205
200
|
# @param [Hash{Symbol => Object}] options
|
206
201
|
# any additional options
|
207
|
-
# @option options [Symbol] :library (:nokogiri)
|
208
|
-
# One of :nokogiri or :rexml. If nil/unspecified uses :nokogiri if available, :rexml otherwise.
|
209
202
|
# @option options [Encoding] :encoding (Encoding::UTF_8)
|
210
203
|
# the encoding of the input stream (Ruby 1.9+)
|
211
204
|
# @option options [Boolean] :validate (false)
|
@@ -231,20 +224,10 @@ module RDF::Microdata
|
|
231
224
|
@debug = options[:debug]
|
232
225
|
@vocab_expansion = options.fetch(:vocab_expansion, true)
|
233
226
|
|
234
|
-
@library =
|
235
|
-
when nil
|
236
|
-
(defined?(::Nokogiri) && RUBY_PLATFORM != 'java') ? :nokogiri : :rexml
|
237
|
-
when :nokogiri, :rexml
|
238
|
-
options[:library]
|
239
|
-
else
|
240
|
-
raise ArgumentError.new("expected :rexml or :nokogiri, but got #{options[:library].inspect}")
|
241
|
-
end
|
227
|
+
@library = :nokogiri
|
242
228
|
|
243
229
|
require "rdf/microdata/reader/#{@library}"
|
244
|
-
@implementation =
|
245
|
-
when :nokogiri then Nokogiri
|
246
|
-
when :rexml then REXML
|
247
|
-
end
|
230
|
+
@implementation = Nokogiri
|
248
231
|
self.extend(@implementation)
|
249
232
|
|
250
233
|
initialize_html(input, options) rescue raise RDF::ReaderError.new($!.message)
|
data/lib/rdf/microdata/vocab.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
module RDF
|
2
2
|
class MD < Vocabulary("http://www.w3.org/ns/md#"); end
|
3
|
-
class Schema < Vocabulary("http://schema.org/"); end
|
4
|
-
class XHV < Vocabulary("http://www.w3.org/1999/xhtml/vocab#"); end
|
5
3
|
class HCard < Vocabulary("http://microformats.org/profile/hcard#"); end
|
6
4
|
class HCalendar < Vocabulary("http://microformats.org/profile/hcalendar"); end
|
7
5
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rdf-microdata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg
|
@@ -9,120 +9,106 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-12-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rdf
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - '>='
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 1.0
|
20
|
+
version: 1.1.0
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - '>='
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: 1.0
|
28
|
-
- !ruby/object:Gem::Dependency
|
29
|
-
name: json
|
30
|
-
requirement: !ruby/object:Gem::Requirement
|
31
|
-
requirements:
|
32
|
-
- - ! '>='
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
version: 1.7.7
|
35
|
-
type: :runtime
|
36
|
-
prerelease: false
|
37
|
-
version_requirements: !ruby/object:Gem::Requirement
|
38
|
-
requirements:
|
39
|
-
- - ! '>='
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
version: 1.7.7
|
27
|
+
version: 1.1.0
|
42
28
|
- !ruby/object:Gem::Dependency
|
43
29
|
name: rdf-xsd
|
44
30
|
requirement: !ruby/object:Gem::Requirement
|
45
31
|
requirements:
|
46
|
-
- -
|
32
|
+
- - '>='
|
47
33
|
- !ruby/object:Gem::Version
|
48
|
-
version:
|
34
|
+
version: 1.1.0
|
49
35
|
type: :runtime
|
50
36
|
prerelease: false
|
51
37
|
version_requirements: !ruby/object:Gem::Requirement
|
52
38
|
requirements:
|
53
|
-
- -
|
39
|
+
- - '>='
|
54
40
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
41
|
+
version: 1.1.0
|
56
42
|
- !ruby/object:Gem::Dependency
|
57
43
|
name: htmlentities
|
58
44
|
requirement: !ruby/object:Gem::Requirement
|
59
45
|
requirements:
|
60
|
-
- -
|
46
|
+
- - '>='
|
61
47
|
- !ruby/object:Gem::Version
|
62
48
|
version: 4.3.0
|
63
49
|
type: :runtime
|
64
50
|
prerelease: false
|
65
51
|
version_requirements: !ruby/object:Gem::Requirement
|
66
52
|
requirements:
|
67
|
-
- -
|
53
|
+
- - '>='
|
68
54
|
- !ruby/object:Gem::Version
|
69
55
|
version: 4.3.0
|
70
56
|
- !ruby/object:Gem::Dependency
|
71
57
|
name: nokogiri
|
72
58
|
requirement: !ruby/object:Gem::Requirement
|
73
59
|
requirements:
|
74
|
-
- -
|
60
|
+
- - '>='
|
75
61
|
- !ruby/object:Gem::Version
|
76
|
-
version: 1.
|
62
|
+
version: 1.6.0
|
77
63
|
type: :development
|
78
64
|
prerelease: false
|
79
65
|
version_requirements: !ruby/object:Gem::Requirement
|
80
66
|
requirements:
|
81
|
-
- -
|
67
|
+
- - '>='
|
82
68
|
- !ruby/object:Gem::Version
|
83
|
-
version: 1.
|
69
|
+
version: 1.6.0
|
84
70
|
- !ruby/object:Gem::Dependency
|
85
71
|
name: equivalent-xml
|
86
72
|
requirement: !ruby/object:Gem::Requirement
|
87
73
|
requirements:
|
88
|
-
- -
|
74
|
+
- - '>='
|
89
75
|
- !ruby/object:Gem::Version
|
90
76
|
version: 0.3.0
|
91
77
|
type: :development
|
92
78
|
prerelease: false
|
93
79
|
version_requirements: !ruby/object:Gem::Requirement
|
94
80
|
requirements:
|
95
|
-
- -
|
81
|
+
- - '>='
|
96
82
|
- !ruby/object:Gem::Version
|
97
83
|
version: 0.3.0
|
98
84
|
- !ruby/object:Gem::Dependency
|
99
85
|
name: open-uri-cached
|
100
86
|
requirement: !ruby/object:Gem::Requirement
|
101
87
|
requirements:
|
102
|
-
- -
|
88
|
+
- - '>='
|
103
89
|
- !ruby/object:Gem::Version
|
104
90
|
version: 0.0.5
|
105
91
|
type: :development
|
106
92
|
prerelease: false
|
107
93
|
version_requirements: !ruby/object:Gem::Requirement
|
108
94
|
requirements:
|
109
|
-
- -
|
95
|
+
- - '>='
|
110
96
|
- !ruby/object:Gem::Version
|
111
97
|
version: 0.0.5
|
112
98
|
- !ruby/object:Gem::Dependency
|
113
99
|
name: yard
|
114
100
|
requirement: !ruby/object:Gem::Requirement
|
115
101
|
requirements:
|
116
|
-
- -
|
102
|
+
- - '>='
|
117
103
|
- !ruby/object:Gem::Version
|
118
|
-
version: 0.8.
|
104
|
+
version: 0.8.7
|
119
105
|
type: :development
|
120
106
|
prerelease: false
|
121
107
|
version_requirements: !ruby/object:Gem::Requirement
|
122
108
|
requirements:
|
123
|
-
- -
|
109
|
+
- - '>='
|
124
110
|
- !ruby/object:Gem::Version
|
125
|
-
version: 0.8.
|
111
|
+
version: 0.8.7
|
126
112
|
- !ruby/object:Gem::Dependency
|
127
113
|
name: spira
|
128
114
|
requirement: !ruby/object:Gem::Requirement
|
@@ -141,72 +127,72 @@ dependencies:
|
|
141
127
|
name: rspec
|
142
128
|
requirement: !ruby/object:Gem::Requirement
|
143
129
|
requirements:
|
144
|
-
- -
|
130
|
+
- - '>='
|
145
131
|
- !ruby/object:Gem::Version
|
146
132
|
version: 2.14.0
|
147
133
|
type: :development
|
148
134
|
prerelease: false
|
149
135
|
version_requirements: !ruby/object:Gem::Requirement
|
150
136
|
requirements:
|
151
|
-
- -
|
137
|
+
- - '>='
|
152
138
|
- !ruby/object:Gem::Version
|
153
139
|
version: 2.14.0
|
154
140
|
- !ruby/object:Gem::Dependency
|
155
141
|
name: rdf-spec
|
156
142
|
requirement: !ruby/object:Gem::Requirement
|
157
143
|
requirements:
|
158
|
-
- -
|
144
|
+
- - '>='
|
159
145
|
- !ruby/object:Gem::Version
|
160
|
-
version:
|
146
|
+
version: 1.1.0
|
161
147
|
type: :development
|
162
148
|
prerelease: false
|
163
149
|
version_requirements: !ruby/object:Gem::Requirement
|
164
150
|
requirements:
|
165
|
-
- -
|
151
|
+
- - '>='
|
166
152
|
- !ruby/object:Gem::Version
|
167
|
-
version:
|
153
|
+
version: 1.1.0
|
168
154
|
- !ruby/object:Gem::Dependency
|
169
155
|
name: rdf-rdfa
|
170
156
|
requirement: !ruby/object:Gem::Requirement
|
171
157
|
requirements:
|
172
|
-
- -
|
158
|
+
- - '>='
|
173
159
|
- !ruby/object:Gem::Version
|
174
|
-
version:
|
160
|
+
version: 1.1.0
|
175
161
|
type: :development
|
176
162
|
prerelease: false
|
177
163
|
version_requirements: !ruby/object:Gem::Requirement
|
178
164
|
requirements:
|
179
|
-
- -
|
165
|
+
- - '>='
|
180
166
|
- !ruby/object:Gem::Version
|
181
|
-
version:
|
167
|
+
version: 1.1.0
|
182
168
|
- !ruby/object:Gem::Dependency
|
183
169
|
name: rdf-turtle
|
184
170
|
requirement: !ruby/object:Gem::Requirement
|
185
171
|
requirements:
|
186
|
-
- -
|
172
|
+
- - '>='
|
187
173
|
- !ruby/object:Gem::Version
|
188
|
-
version: 1.0
|
174
|
+
version: 1.1.0
|
189
175
|
type: :development
|
190
176
|
prerelease: false
|
191
177
|
version_requirements: !ruby/object:Gem::Requirement
|
192
178
|
requirements:
|
193
|
-
- -
|
179
|
+
- - '>='
|
194
180
|
- !ruby/object:Gem::Version
|
195
|
-
version: 1.0
|
181
|
+
version: 1.1.0
|
196
182
|
- !ruby/object:Gem::Dependency
|
197
183
|
name: rdf-isomorphic
|
198
184
|
requirement: !ruby/object:Gem::Requirement
|
199
185
|
requirements:
|
200
|
-
- -
|
186
|
+
- - '>='
|
201
187
|
- !ruby/object:Gem::Version
|
202
|
-
version:
|
188
|
+
version: 1.1.0
|
203
189
|
type: :development
|
204
190
|
prerelease: false
|
205
191
|
version_requirements: !ruby/object:Gem::Requirement
|
206
192
|
requirements:
|
207
|
-
- -
|
193
|
+
- - '>='
|
208
194
|
- !ruby/object:Gem::Version
|
209
|
-
version:
|
195
|
+
version: 1.1.0
|
210
196
|
description: Microdata reader for Ruby.
|
211
197
|
email: public-rdf-ruby@w3.org
|
212
198
|
executables: []
|
@@ -220,7 +206,6 @@ files:
|
|
220
206
|
- lib/rdf/microdata/expansion.rb
|
221
207
|
- lib/rdf/microdata/format.rb
|
222
208
|
- lib/rdf/microdata/reader/nokogiri.rb
|
223
|
-
- lib/rdf/microdata/reader/rexml.rb
|
224
209
|
- lib/rdf/microdata/reader.rb
|
225
210
|
- lib/rdf/microdata/version.rb
|
226
211
|
- lib/rdf/microdata/vocab.rb
|
@@ -237,17 +222,17 @@ require_paths:
|
|
237
222
|
- lib
|
238
223
|
required_ruby_version: !ruby/object:Gem::Requirement
|
239
224
|
requirements:
|
240
|
-
- -
|
225
|
+
- - '>='
|
241
226
|
- !ruby/object:Gem::Version
|
242
|
-
version: 1.
|
227
|
+
version: 1.9.2
|
243
228
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
244
229
|
requirements:
|
245
|
-
- -
|
230
|
+
- - '>='
|
246
231
|
- !ruby/object:Gem::Version
|
247
232
|
version: '0'
|
248
233
|
requirements: []
|
249
234
|
rubyforge_project: rdf-microdata
|
250
|
-
rubygems_version: 2.
|
235
|
+
rubygems_version: 2.1.11
|
251
236
|
signing_key:
|
252
237
|
specification_version: 4
|
253
238
|
summary: Microdata reader for Ruby.
|
@@ -1,277 +0,0 @@
|
|
1
|
-
require 'htmlentities'
|
2
|
-
|
3
|
-
module RDF::Microdata
|
4
|
-
class Reader < RDF::Reader
|
5
|
-
##
|
6
|
-
# REXML implementation of an HTML parser.
|
7
|
-
#
|
8
|
-
# @see http://www.germane-software.com/software/rexml/
|
9
|
-
module REXML
|
10
|
-
##
|
11
|
-
# Returns the name of the underlying XML library.
|
12
|
-
#
|
13
|
-
# @return [Symbol]
|
14
|
-
def self.library
|
15
|
-
:rexml
|
16
|
-
end
|
17
|
-
|
18
|
-
# Proxy class to implement uniform element accessors
|
19
|
-
class NodeProxy
|
20
|
-
attr_reader :node
|
21
|
-
attr_reader :parent
|
22
|
-
|
23
|
-
def initialize(node, parent = nil)
|
24
|
-
@node = node
|
25
|
-
@parent = parent
|
26
|
-
end
|
27
|
-
|
28
|
-
##
|
29
|
-
# Element language
|
30
|
-
#
|
31
|
-
# From HTML5 3.2.3.3
|
32
|
-
# If both the lang attribute in no namespace and the lang attribute in the XML namespace are set
|
33
|
-
# on an element, user agents must use the lang attribute in the XML namespace, and the lang
|
34
|
-
# attribute in no namespace must be ignored for the purposes of determining the element's
|
35
|
-
# language.
|
36
|
-
#
|
37
|
-
# @return [String]
|
38
|
-
def language
|
39
|
-
language = case
|
40
|
-
when @node.attribute("lang")
|
41
|
-
@node.attribute("lang").to_s
|
42
|
-
else
|
43
|
-
parent && parent.element? && parent.language
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
##
|
48
|
-
# Return xml:base on element, if defined
|
49
|
-
#
|
50
|
-
# @return [String]
|
51
|
-
def base
|
52
|
-
if @base.nil?
|
53
|
-
@base = attributes['xml:base'] ||
|
54
|
-
(parent && parent.element? && parent.base) ||
|
55
|
-
false
|
56
|
-
end
|
57
|
-
|
58
|
-
@base == false ? nil : @base
|
59
|
-
end
|
60
|
-
|
61
|
-
def display_path
|
62
|
-
@display_path ||= begin
|
63
|
-
path = []
|
64
|
-
path << parent.display_path if parent
|
65
|
-
path << @node.name
|
66
|
-
case @node
|
67
|
-
when ::REXML::Element then path.join("/")
|
68
|
-
when ::REXML::Attribute then path.join("@")
|
69
|
-
else path.join("?")
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
##
|
75
|
-
# Return true of all child elements are text
|
76
|
-
#
|
77
|
-
# @return [Array<:text, :element, :attribute>]
|
78
|
-
def text_content?
|
79
|
-
@node.children.all? {|c| c.is_a?(::REXML::Text)}
|
80
|
-
end
|
81
|
-
|
82
|
-
##
|
83
|
-
# Retrieve XMLNS definitions for this element
|
84
|
-
#
|
85
|
-
# @return [Hash{String => String}]
|
86
|
-
def namespaces
|
87
|
-
ns_decls = {}
|
88
|
-
@node.attributes.each do |name, attr|
|
89
|
-
next unless name =~ /^xmlns(?:\:(.+))?/
|
90
|
-
ns_decls[$1] = attr
|
91
|
-
end
|
92
|
-
ns_decls
|
93
|
-
end
|
94
|
-
|
95
|
-
##
|
96
|
-
# Children of this node
|
97
|
-
#
|
98
|
-
# @return [NodeSetProxy]
|
99
|
-
def children
|
100
|
-
NodeSetProxy.new(@node.children, self)
|
101
|
-
end
|
102
|
-
|
103
|
-
##
|
104
|
-
# Elements of this node
|
105
|
-
#
|
106
|
-
# @return [NodeSetProxy]
|
107
|
-
def elements
|
108
|
-
NodeSetProxy.new(@node.children.select {|c| c.is_a?(::REXML::Element)}, self)
|
109
|
-
end
|
110
|
-
|
111
|
-
##
|
112
|
-
# Inner text of an element
|
113
|
-
#
|
114
|
-
# @see http://apidock.com/ruby/REXML/Element/get_text#743-Get-all-inner-texts
|
115
|
-
# @return [String]
|
116
|
-
def inner_text
|
117
|
-
coder = HTMLEntities.new
|
118
|
-
::REXML::XPath.match(@node,'.//text()').map { |e|
|
119
|
-
coder.decode(e)
|
120
|
-
}.join
|
121
|
-
end
|
122
|
-
|
123
|
-
##
|
124
|
-
# Inner text of an element
|
125
|
-
#
|
126
|
-
# @see http://apidock.com/ruby/REXML/Element/get_text#743-Get-all-inner-texts
|
127
|
-
# @return [String]
|
128
|
-
def inner_html
|
129
|
-
@node.children.map(&:to_s).join
|
130
|
-
end
|
131
|
-
|
132
|
-
##
|
133
|
-
# Node type accessors
|
134
|
-
#
|
135
|
-
# @return [Boolean]
|
136
|
-
def element?
|
137
|
-
@node.is_a?(::REXML::Element)
|
138
|
-
end
|
139
|
-
|
140
|
-
def has_attribute?(attr)
|
141
|
-
!!node.attribute(attr)
|
142
|
-
end
|
143
|
-
|
144
|
-
##
|
145
|
-
# Proxy for everything else to @node
|
146
|
-
def method_missing(method, *args)
|
147
|
-
@node.send(method, *args)
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
##
|
152
|
-
# NodeSet proxy
|
153
|
-
class NodeSetProxy
|
154
|
-
attr_reader :node_set
|
155
|
-
attr_reader :parent
|
156
|
-
|
157
|
-
def initialize(node_set, parent)
|
158
|
-
@node_set = node_set
|
159
|
-
@parent = parent
|
160
|
-
end
|
161
|
-
|
162
|
-
##
|
163
|
-
# Return a proxy for each child
|
164
|
-
#
|
165
|
-
# @yield child
|
166
|
-
# @yieldparam [NodeProxy] child
|
167
|
-
def each
|
168
|
-
@node_set.each do |c|
|
169
|
-
yield NodeProxy.new(c, parent)
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
##
|
174
|
-
# Return proxy for first element and remove it
|
175
|
-
# @return [NodeProxy]
|
176
|
-
def shift
|
177
|
-
(e = node_set.shift) && NodeProxy.new(e, parent)
|
178
|
-
end
|
179
|
-
|
180
|
-
##
|
181
|
-
# Add NodeSetProxys
|
182
|
-
# @param [NodeSetProxy, REXML::Element] other
|
183
|
-
# @return [NodeSetProxy]
|
184
|
-
def +(other)
|
185
|
-
new_ns = node_set.clone
|
186
|
-
other.node_set.each {|n| new_ns << n}
|
187
|
-
NodeSetProxy.new(new_ns, parent)
|
188
|
-
end
|
189
|
-
|
190
|
-
##
|
191
|
-
# Add a NodeProxy
|
192
|
-
# @param [NodeProxy, REXML::Element] elem
|
193
|
-
# @return [NodeSetProxy]
|
194
|
-
def <<(elem)
|
195
|
-
node_set << (elem.is_a?(NodeProxy) ? elem.node : elem)
|
196
|
-
self
|
197
|
-
end
|
198
|
-
|
199
|
-
def inspect
|
200
|
-
@node_set.map {|c| NodeProxy.new(c, parent).display_path}.inspect
|
201
|
-
end
|
202
|
-
|
203
|
-
##
|
204
|
-
# Proxy for everything else to @node_set
|
205
|
-
def method_missing(method, *args)
|
206
|
-
@node_set.send(method, *args)
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
|
-
##
|
211
|
-
# Initializes the underlying XML library.
|
212
|
-
#
|
213
|
-
# @param [Hash{Symbol => Object}] options
|
214
|
-
# @return [void]
|
215
|
-
def initialize_html(input, options = {})
|
216
|
-
require 'rexml/document' unless defined?(::REXML)
|
217
|
-
@doc = case input
|
218
|
-
when ::REXML::Document
|
219
|
-
input
|
220
|
-
else
|
221
|
-
# Try to detect charset from input
|
222
|
-
options[:encoding] ||= input.charset if input.respond_to?(:charset)
|
223
|
-
|
224
|
-
# Otherwise, default is utf-8
|
225
|
-
options[:encoding] ||= 'utf-8'
|
226
|
-
|
227
|
-
# Set xml:base for the document element, if defined
|
228
|
-
@base_uri = base_uri ? base_uri.to_s : nil
|
229
|
-
|
230
|
-
# Only parse as XML, no HTML mode
|
231
|
-
doc = ::REXML::Document.new(input.respond_to?(:read) ? input.read : input.to_s)
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
|
-
# Accessor methods to mask native elements & attributes
|
236
|
-
|
237
|
-
##
|
238
|
-
# Return proxy for document root
|
239
|
-
def root
|
240
|
-
@root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
|
241
|
-
end
|
242
|
-
|
243
|
-
##
|
244
|
-
# Document errors
|
245
|
-
def doc_errors
|
246
|
-
[]
|
247
|
-
end
|
248
|
-
|
249
|
-
##
|
250
|
-
# Find value of document base
|
251
|
-
#
|
252
|
-
# @param [String] base Existing base from URI or :base_uri
|
253
|
-
# @return [String]
|
254
|
-
def doc_base(base)
|
255
|
-
# find if the document has a base element
|
256
|
-
base_el = ::REXML::XPath.first(@doc, "/html/head/base")
|
257
|
-
base = base_el.attribute("href").to_s.split("#").first if base_el
|
258
|
-
|
259
|
-
base || @base_uri
|
260
|
-
end
|
261
|
-
|
262
|
-
##
|
263
|
-
# Based on Microdata element.getItems
|
264
|
-
#
|
265
|
-
# @see http://www.w3.org/TR/2011/WD-microdata-20110525/#top-level-microdata-items
|
266
|
-
def getItems
|
267
|
-
::REXML::XPath.match(@doc, "//[@itemscope]").select {|el| !el.attribute('itemprop')}.map {|n| NodeProxy.new(n)}
|
268
|
-
end
|
269
|
-
|
270
|
-
##
|
271
|
-
# Look up an element in the document by id
|
272
|
-
def find_element_by_id(id)
|
273
|
-
(e = ::REXML::XPath.first(@doc, "//[@id='#{id}']")) && NodeProxy.new(e)
|
274
|
-
end
|
275
|
-
end
|
276
|
-
end
|
277
|
-
end
|