iudex-html 1.2.b.1-java → 1.2.b.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.rdoc CHANGED
@@ -1,3 +1,7 @@
1
+ === 1.2.b.2 (2012-6-23)
2
+ * Fix #8: Add Neko parser check for empty (malformed) attribute names.
3
+ * Extend/upgrade to gravitext-xmlprod [1.5.1,1.7)
4
+
1
5
  === 1.2.b.1 (2012-5-31)
2
6
  * Add support for HTML 5 (draft) tags, attributes
3
7
  * Neko parser support for HTML 5 <meta charset>
data/Manifest.txt CHANGED
@@ -25,4 +25,4 @@ test/test_parse_filter.rb
25
25
  test/test_stax_parser.rb
26
26
  test/test_tree_walker.rb
27
27
  test/test_word_counters.rb
28
- lib/iudex-html/iudex-html-1.2.b.1.jar
28
+ lib/iudex-html/iudex-html-1.2.b.2.jar
@@ -16,6 +16,6 @@
16
16
 
17
17
  module Iudex
18
18
  module HTML
19
- VERSION = '1.2.b.1'
19
+ VERSION = '1.2.b.2'
20
20
  end
21
21
  end
data/pom.xml CHANGED
@@ -3,7 +3,7 @@
3
3
  <groupId>iudex</groupId>
4
4
  <artifactId>iudex-html</artifactId>
5
5
  <packaging>jar</packaging>
6
- <version>1.2.b.1</version>
6
+ <version>1.2.b.2</version>
7
7
  <name>Iudex HTML parsing/filtering and text extraction</name>
8
8
 
9
9
  <parent>
@@ -24,7 +24,7 @@
24
24
  <dependency>
25
25
  <groupId>com.gravitext</groupId>
26
26
  <artifactId>gravitext-xmlprod</artifactId>
27
- <version>[1.5.1,1.5.9999)</version>
27
+ <version>[1.5.1,1.6.9999)</version>
28
28
  </dependency>
29
29
 
30
30
  <dependency>
@@ -150,4 +150,20 @@ HTML
150
150
  assert_fragment( html[ :out ], tree )
151
151
  end
152
152
 
153
+ import 'iudex.html.neko.NekoHTMLParser'
154
+
155
+ # Neko yields attributes with empty localName, given this invalid
156
+ # input (#8)
157
+ def test_invalid_attribute
158
+ html = { :in => '<div><img alt=""wns : next class="artwork" /></div>',
159
+ :out => '<div><img alt="" wns="" next="" class="artwork"/></div>' }
160
+
161
+ parser = NekoHTMLParser.new
162
+ parser.parse_as_fragment = true
163
+ parser.skip_banned = false # required to reproduce empty localName
164
+
165
+ tree = inner( parser.parse( source( html[ :in ], "UTF-8" ) ) )
166
+ assert_fragment( html[ :out ], tree )
167
+ end
168
+
153
169
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: iudex-html
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease: 4
5
- version: 1.2.b.1
5
+ version: 1.2.b.2
6
6
  platform: java
7
7
  authors:
8
8
  - David Kellum
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-06-01 00:00:00 Z
13
+ date: 2012-06-23 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: iudex-core
@@ -39,9 +39,12 @@ dependencies:
39
39
  version_requirements: &id003 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
- - - ~>
42
+ - - ">="
43
43
  - !ruby/object:Gem::Version
44
44
  version: 1.5.1
45
+ - - <
46
+ - !ruby/object:Gem::Version
47
+ version: "1.7"
45
48
  requirement: *id003
46
49
  prerelease: false
47
50
  type: :runtime
@@ -117,7 +120,7 @@ files:
117
120
  - test/test_stax_parser.rb
118
121
  - test/test_tree_walker.rb
119
122
  - test/test_word_counters.rb
120
- - lib/iudex-html/iudex-html-1.2.b.1.jar
123
+ - lib/iudex-html/iudex-html-1.2.b.2.jar
121
124
  homepage: http://github.com/dekellum/iudex
122
125
  licenses: []
123
126