oga 0.2.3-java → 0.3.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/oga.gemspec CHANGED
@@ -32,8 +32,8 @@ Gem::Specification.new do |s|
32
32
  s.has_rdoc = 'yard'
33
33
  s.required_ruby_version = '>= 1.9.3'
34
34
 
35
- s.add_dependency 'racc', ['~> 1.4', '>= 1.4.12']
36
35
  s.add_dependency 'ast'
36
+ s.add_dependency 'ruby-ll', '~> 2.1'
37
37
 
38
38
  s.add_development_dependency 'rake'
39
39
  s.add_development_dependency 'rspec', ['~> 3.0']
metadata CHANGED
@@ -1,47 +1,41 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: oga
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: java
6
6
  authors:
7
7
  - Yorick Peterse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-04 00:00:00.000000000 Z
11
+ date: 2015-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: racc
14
+ name: ast
15
15
  version_requirements: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.4'
20
17
  - - '>='
21
18
  - !ruby/object:Gem::Version
22
- version: 1.4.12
19
+ version: '0'
23
20
  requirement: !ruby/object:Gem::Requirement
24
21
  requirements:
25
- - - ~>
26
- - !ruby/object:Gem::Version
27
- version: '1.4'
28
22
  - - '>='
29
23
  - !ruby/object:Gem::Version
30
- version: 1.4.12
24
+ version: '0'
31
25
  prerelease: false
32
26
  type: :runtime
33
27
  - !ruby/object:Gem::Dependency
34
- name: ast
28
+ name: ruby-ll
35
29
  version_requirements: !ruby/object:Gem::Requirement
36
30
  requirements:
37
- - - '>='
31
+ - - ~>
38
32
  - !ruby/object:Gem::Version
39
- version: '0'
33
+ version: '2.1'
40
34
  requirement: !ruby/object:Gem::Requirement
41
35
  requirements:
42
- - - '>='
36
+ - - ~>
43
37
  - !ruby/object:Gem::Version
44
- version: '0'
38
+ version: '2.1'
45
39
  prerelease: false
46
40
  type: :runtime
47
41
  - !ruby/object:Gem::Dependency
@@ -148,54 +142,56 @@ executables: []
148
142
  extensions: []
149
143
  extra_rdoc_files: []
150
144
  files:
151
- - doc/manually_creating_documents.md
152
- - doc/DCO.md
153
145
  - doc/xml_namespaces.md
146
+ - doc/manually_creating_documents.md
154
147
  - doc/css_selectors.md
155
148
  - doc/migrating_from_nokogiri.md
156
- - doc/changelog.md
149
+ - doc/DCO.md
157
150
  - doc/css/common.css
158
151
  - lib/oga.rb
159
- - lib/oga/oga.rb
160
152
  - lib/oga/version.rb
153
+ - lib/oga/lru.rb
154
+ - lib/oga/oga.rb
161
155
  - lib/oga/css/lexer.rb
162
156
  - lib/oga/css/parser.rb
163
- - lib/oga/html/sax_parser.rb
164
- - lib/oga/html/parser.rb
165
- - lib/oga/xml/lexer.rb
166
157
  - lib/oga/xml/namespace.rb
167
- - lib/oga/xml/processing_instruction.rb
168
- - lib/oga/xml/character_node.rb
169
- - lib/oga/xml/sax_parser.rb
170
- - lib/oga/xml/doctype.rb
171
- - lib/oga/xml/document.rb
172
- - lib/oga/xml/comment.rb
173
- - lib/oga/xml/text.rb
158
+ - lib/oga/xml/lexer.rb
174
159
  - lib/oga/xml/querying.rb
175
- - lib/oga/xml/attribute.rb
176
- - lib/oga/xml/pull_parser.rb
177
160
  - lib/oga/xml/parser.rb
178
- - lib/oga/xml/entities.rb
179
- - lib/oga/xml/html_void_elements.rb
161
+ - lib/oga/xml/traversal.rb
162
+ - lib/oga/xml/text.rb
180
163
  - lib/oga/xml/node.rb
164
+ - lib/oga/xml/document.rb
165
+ - lib/oga/xml/pull_parser.rb
181
166
  - lib/oga/xml/node_set.rb
167
+ - lib/oga/xml/sax_parser.rb
168
+ - lib/oga/xml/cdata.rb
182
169
  - lib/oga/xml/element.rb
170
+ - lib/oga/xml/character_node.rb
171
+ - lib/oga/xml/doctype.rb
172
+ - lib/oga/xml/html_void_elements.rb
173
+ - lib/oga/xml/entities.rb
174
+ - lib/oga/xml/default_namespace.rb
175
+ - lib/oga/xml/attribute.rb
183
176
  - lib/oga/xml/xml_declaration.rb
184
- - lib/oga/xml/cdata.rb
185
- - lib/oga/xml/traversal.rb
177
+ - lib/oga/xml/processing_instruction.rb
178
+ - lib/oga/xml/comment.rb
179
+ - lib/oga/html/parser.rb
180
+ - lib/oga/html/sax_parser.rb
181
+ - lib/oga/html/entities.rb
186
182
  - lib/oga/xpath/lexer.rb
187
- - lib/oga/xpath/evaluator.rb
188
183
  - lib/oga/xpath/parser.rb
189
- - ext/c/lexer.c
190
- - ext/c/lexer.rl
191
- - ext/c/lexer.h
192
- - ext/c/liboga.c
193
- - ext/c/extconf.rb
194
- - ext/c/liboga.h
184
+ - lib/oga/xpath/evaluator.rb
195
185
  - ext/ragel/base_lexer.rl
196
186
  - ext/java/Liboga.java
197
187
  - ext/java/org/liboga/xml/Lexer.java
198
188
  - ext/java/org/liboga/xml/Lexer.rl
189
+ - ext/c/extconf.rb
190
+ - ext/c/lexer.rl
191
+ - ext/c/lexer.h
192
+ - ext/c/liboga.c
193
+ - ext/c/lexer.c
194
+ - ext/c/liboga.h
199
195
  - README.md
200
196
  - LICENSE
201
197
  - oga.gemspec
data/doc/changelog.md DELETED
@@ -1,324 +0,0 @@
1
- # Changelog
2
-
3
- This document contains details of the various releases and their release dates.
4
- Dates are in the format `yyyy-mm-dd`.
5
-
6
- ## 0.2.3 - 2015-03-04
7
-
8
- This release adds support for lexing HTML `<style>` tags similar to how
9
- `<script>` tags are handled. This ensures that the contents of these tags are
10
- treated as-is without any HTML entity conversion being applied.
11
-
12
- See commits 78e40b55c0e5941bee5791a5014260e9c2cf8aad and
13
- 3b2055a30b128aa679a83332dfdfa68314271b24 for more information.
14
-
15
- ## 0.2.2 - 2015-03-03
16
-
17
- This release fixes a bug where setting the text of an element using
18
- `Oga::XML::Element#inner_text=` would not set the parent element of the newly
19
- created text node. This would result in the following:
20
-
21
- some_element.inner_text = 'foo'
22
-
23
- some_element.children[0].parent # => nil
24
-
25
- Here `parent` is supposed to return `some_element` instead. See commit
26
- 142b467277dc9864df8279347ba737ddf60f4836 for more information.
27
-
28
- ## 0.2.1 - 2015-03-02
29
-
30
- ### Proper HTML serializing support for script tags
31
-
32
- When serializing an HTML document back to HTML (as a String) the contents of
33
- `<script>` tags are serialized correctly. Previously XML unsafe characters
34
- (e.g. `<`) would be converted to XML entities, which results in invalid
35
- Javascript syntax. This has been changed so that `<script>` tags in HTML
36
- documents _don't_ have their contents converted, ensuring proper Javascript
37
- syntax upon output.
38
-
39
- See commit 874d7124af540f0bc78e6c586868bbffb4310c5d and issue
40
- <https://github.com/YorickPeterse/oga/issues/79> for more information.
41
-
42
- ### Proper lexing support for script tags
43
-
44
- When lexing HTML documents the XML lexer is now capable of lexing the contents
45
- of `<script>` tags properly. Previously input such as `<script>x >y</script>`
46
- would result in incorrect tokens being emitted. See commit
47
- ba2177e2cfda958ea12c5b04dbf60907aaa8816d and issue
48
- <https://github.com/YorickPeterse/oga/issues/70> for more information.
49
-
50
- ### Element Inner Text
51
-
52
- When setting the inner text of an element using `Oga::XML::Element#inner_text=`
53
- _all_ child nodes of the element are now removed first, instead of only text
54
- nodes being removed.
55
-
56
- See <https://github.com/YorickPeterse/oga/issues/64> for more information.
57
-
58
- ### Support for extra XML entities
59
-
60
- Support for encoding/decoding extra XML entities was added by Dmitry
61
- Krasnoukhov. This includes entities such as `&#60`, `&#34`, etc. See commit
62
- 26baf89440d97bd9dd5e50ec3d6d9b7ab3bdf737 for more information.
63
-
64
- ### Support for inline doctypes with newlines in IO input
65
-
66
- The XML lexer (and thus the parser) can now handle inline doctypes containing
67
- newlines when using an IO object as the input. For example:
68
-
69
- <!DOCTYPE html[foo
70
- bar]>
71
-
72
- Previously this would result in incorrect tokens being emitted by the lexer. See
73
- commit cbb2815146a79805b8da483d2ef48d17e2959e72 for more information.
74
-
75
- ## 0.2.0 - 2014-11-17
76
-
77
- ### CSS Selector Support
78
-
79
- Probably the biggest feature of this release: support for querying documents
80
- using CSS selectors. Oga supports a subset of the CSS3 selector specification,
81
- in particular the following selectors are supported:
82
-
83
- * Element, class and ID selectors
84
- * Attribute selectors (e.g. `foo[x ~= "y"]`)
85
-
86
- The following pseudo classes are supported:
87
-
88
- * `:root`
89
- * `:nth-child(n)`
90
- * `:nth-last-child(n)`
91
- * `:nth-of-type(n)`
92
- * `:nth-last-of-type(n)`
93
- * `:first-child`
94
- * `:last-child`
95
- * `:first-of-type`
96
- * `:last-of-type`
97
- * `:only-child`
98
- * `:only-of-type`
99
- * `:empty`
100
-
101
- You can use CSS selectors using the methods `css` and `at_css` on an instance of
102
- `Oga::XML::Document` or `Oga::XML::Element`. For example:
103
-
104
- document = Oga.parse_xml('<people><person>Alice</person></people>')
105
-
106
- document.css('people person') # => NodeSet(Element(name: "person" ...))
107
-
108
- The architecture behind this is quite similar to parsing XPath. There's a lexer
109
- (`Oga::CSS::Lexer`) and a parser (`Oga::CSS::Parser`). Unlike Nokogiri (and
110
- perhaps other libraries) the parser _does not_ output XPath expressions as a
111
- String or a CSS specific AST. Instead it directly emits an XPath AST. This
112
- allows the resulting AST to be directly evaluated by `Oga::XPath::Evaluator`.
113
-
114
- See <https://github.com/YorickPeterse/oga/issues/11> for more information.
115
-
116
- ### Mutli-line Attribute Support
117
-
118
- Oga can now lex/parse elements that have attributes with newlines in them.
119
- Previously this would trigger memory allocation errors.
120
-
121
- See <https://github.com/YorickPeterse/oga/issues/58> for more information.
122
-
123
- ### SAX after_element
124
-
125
- The `after_element` method in the SAX parsing API now always takes two
126
- arguments: the namespace name and element name. Previously this method would
127
- always receive a single nil value as its argument, which is rather pointless.
128
-
129
- See <https://github.com/YorickPeterse/oga/issues/54> for more information.
130
-
131
- ### XPath Grouping
132
-
133
- XPath expressions can now be grouped together using parenthesis. This allows one
134
- to specify a custom operator precedence.
135
-
136
- ### Enumerator Parsing Input
137
-
138
- Enumerator instances can now be used as input for `Oga.parse_xml` and friends.
139
- This can be used to download and parse XML files on the fly. For example:
140
-
141
- enum = Enumerator.new do |yielder|
142
- HTTPClient.get('http://some-website.com/some-big-file.xml') do |chunk|
143
- yielder << chunk
144
- end
145
- end
146
-
147
- document = Oga.parse_xml(enum)
148
-
149
- See <https://github.com/YorickPeterse/oga/issues/48> for more information.
150
-
151
- ### Removing Attributes
152
-
153
- Element attributes can now be removed using `Oga::XML::Element#unset`:
154
-
155
- element = Oga::XML::Element.new(:name => 'foo')
156
-
157
- element.set('class', 'foo')
158
- element.unset('class')
159
-
160
- ### XPath Attributes
161
-
162
- XPath predicates are now evaluated for every context node opposed to being
163
- evaluated once for the entire context. This ensures that expressions such as
164
- `descendant-or-self::node()/foo[1]` are evaluated correctly.
165
-
166
- ### Available Namespaces
167
-
168
- When calling `Oga::XML::Element#available_namespaces` the Hash returned by
169
- `Oga::XML::Element#namespaces` would be modified in place. This was a bug that
170
- has been fixed in this release.
171
-
172
- ### NodeSets
173
-
174
- NodeSet instances can now be compared with each other using `==`. Previously
175
- this would always consider two instances to be different from each other due to
176
- the usage of the default `Object#==` method.
177
-
178
- ### XML Entities
179
-
180
- XML entities such as `&amp;` and `&lt;` are now encoded/decoded by the lexer,
181
- string and text nodes.
182
-
183
- See <https://github.com/YorickPeterse/oga/issues/49> for more information.
184
-
185
- ### General
186
-
187
- Source lines are no longer included in error messages generated by the XML
188
- parser. This simplifies the code and removes the need of re-reading the input
189
- (in case of IO/Enumerable inputs).
190
-
191
- ### XML Lexer Newlines
192
-
193
- Newlines in the XML lexer are now counted in native code (C/Java). On MRI and
194
- JRuby the improvement is quite small, but on Rubinius it's a massive
195
- improvement. See commit `8db77c0a09bf6c996dd2856a6dbe1ad076b1d30a` for more
196
- information.
197
-
198
- ### HTML Void Element Performance
199
-
200
- Performance for detecting HTML void elements (e.g. `<br>` and `<link>`) has been
201
- improved by removing String allocations that were not needed.
202
-
203
- ## 0.1.3 - 2014-09-24
204
-
205
- This release fixes a problem with serializing attributes using the namespace
206
- prefix "xmlns". See <https://github.com/YorickPeterse/oga/issues/47> for more
207
- information.
208
-
209
- ## 0.1.2 - 2014-09-23
210
-
211
- ### SAX API
212
-
213
- A SAX parser/API has been added. This API is useful when even the overhead of
214
- the pull-parser is too much memory wise. Example:
215
-
216
- class ElementNames
217
- attr_reader :names
218
-
219
- def initialize
220
- @names = []
221
- end
222
-
223
- def on_element(namespace, name, attrs = {})
224
- @names << name
225
- end
226
- end
227
-
228
- handler = ElementNames.new
229
-
230
- Oga.sax_parse_xml(handler, '<foo><bar></bar></foo>')
231
-
232
- handler.names # => ["foo", "bar"]
233
-
234
- ### Racc Gem
235
-
236
- Oga will now always use the Racc gem instead of the version shipped with the
237
- Ruby standard library.
238
-
239
- ### Error Reporting
240
-
241
- XML parser errors have been made a little bit more user friendly, though they
242
- can still be quite cryptic.
243
-
244
- ### Serializing Elements
245
-
246
- Elements serialized to XML/HTML will use self-closing tags whenever possible.
247
- When parsing HTML documents only HTML void elements will use self-closing tags
248
- (e.g. `<link>` tags). Example:
249
-
250
- Oga.parse_xml('<foo></foo>').to_xml # => "<foo />"
251
- Oga.parse_html('<script></script>').to_xml # => "<script></script>"
252
-
253
- ### Default Namespaces
254
-
255
- Namespaces are no longer removed from the attributes list when an element is
256
- created.
257
-
258
- Default XML namespaces can now be registered using `xmlns="..."`. Previously
259
- this would be ignored. Example:
260
-
261
- document = Oga.parse_xml('<root xmlns="baz"></root>')
262
- root = document.children[0]
263
-
264
- root.namespace # => Namespace(name: "xmlns" uri: "baz")
265
-
266
- ### Lexing Incomplete Input
267
-
268
- Oga can now lex input such as `</` without entering an infinite loop. Example:
269
-
270
- Oga.parse_xml('</') # => Document(children: NodeSet(Text("</")))
271
-
272
- ### Absolute XPath Paths
273
-
274
- Oga can now parse and evaluate the XPath expression "/" (that is, just "/").
275
- This will return the root node (usually a Document instance). Example:
276
-
277
- document = Oga.parse_xml('<root></root>')
278
-
279
- document.xpath('/') # => NodeSet(Document(children: NodeSet(Element(name: "root"))))
280
-
281
- ### Namespace Ordering
282
-
283
- Namespaces available to an element are now returned in the correct order.
284
- Previously outer namespaces would take precedence over inner namespaces, instead
285
- of it being the other way around. Example:
286
-
287
- document = Oga.parse_xml <<-EOF
288
- <root xmlns:foo="bar">
289
- <container xmlns:foo="baz">
290
- <foo:text>Text!</foo:text>
291
- </container>
292
- </root>
293
- EOF
294
-
295
- foo = document.at_xpath('root/container/foo:text')
296
-
297
- foo.namespace # => Namespace(name: "foo" uri: "baz")
298
-
299
- ### Parsing Capitalized HTML Void Elements
300
-
301
- Oga is now capable of parsing capitalized HTML void elements (e.g. `<BR>`).
302
- Previously it could only parse lower-cased void elements. Thanks to Tero Tasanen
303
- for fixing this. Example:
304
-
305
- Oga.parse_html('<BR>') # => Document(children: NodeSet(Element(name: "BR")))
306
-
307
- ### Node Type Method Removed
308
-
309
- The `node_type` method has been removed and its purpose has been moved into
310
- the `XML::PullParser` class itself. This method was solely used by the pull
311
- parser to provide shorthands for node classes. As such it doesn't make sense to
312
- expose this as a method to the outside world as a public method.
313
-
314
- ## 0.1.1 - 2014-09-13
315
-
316
- This release fixes a problem where element attributes were not separated by
317
- spaces. Thanks to Jonathan Rochkind for reporting it and Bill Dueber providing
318
- an initial patch for this problem.
319
-
320
- ## 0.1.0 - 2014-09-12
321
-
322
- The first public release of Oga. This release contains support for parsing XML,
323
- basic support for parsing HTML, support for querying documents using XPath and
324
- more.