iudex-html 1.2.b.1-java → 1.2.b.2-java
Sign up to get free protection for your applications and to get access to all the features.
- data/History.rdoc +4 -0
- data/Manifest.txt +1 -1
- data/lib/iudex-html/base.rb +1 -1
- data/lib/iudex-html/{iudex-html-1.2.b.1.jar → iudex-html-1.2.b.2.jar} +0 -0
- data/pom.xml +2 -2
- data/test/test_html_parser.rb +16 -0
- metadata +7 -4
data/History.rdoc
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
=== 1.2.b.2 (2012-6-23)
|
2
|
+
* Fix #8: Add Neko parser check for empty (malformed) attribute names.
|
3
|
+
* Extend/upgrade to gravitext-xmlprod [1.5.1,1.7)
|
4
|
+
|
1
5
|
=== 1.2.b.1 (2012-5-31)
|
2
6
|
* Add support for HTML 5 (draft) tags, attributes
|
3
7
|
* Neko parser support for HTML 5 <meta charset>
|
data/Manifest.txt
CHANGED
data/lib/iudex-html/base.rb
CHANGED
Binary file
|
data/pom.xml
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
<groupId>iudex</groupId>
|
4
4
|
<artifactId>iudex-html</artifactId>
|
5
5
|
<packaging>jar</packaging>
|
6
|
-
<version>1.2.b.
|
6
|
+
<version>1.2.b.2</version>
|
7
7
|
<name>Iudex HTML parsing/filtering and text extraction</name>
|
8
8
|
|
9
9
|
<parent>
|
@@ -24,7 +24,7 @@
|
|
24
24
|
<dependency>
|
25
25
|
<groupId>com.gravitext</groupId>
|
26
26
|
<artifactId>gravitext-xmlprod</artifactId>
|
27
|
-
<version>[1.5.1,1.
|
27
|
+
<version>[1.5.1,1.6.9999)</version>
|
28
28
|
</dependency>
|
29
29
|
|
30
30
|
<dependency>
|
data/test/test_html_parser.rb
CHANGED
@@ -150,4 +150,20 @@ HTML
|
|
150
150
|
assert_fragment( html[ :out ], tree )
|
151
151
|
end
|
152
152
|
|
153
|
+
import 'iudex.html.neko.NekoHTMLParser'
|
154
|
+
|
155
|
+
# Neko yields attributes with empty localName, given this invalid
|
156
|
+
# input (#8)
|
157
|
+
def test_invalid_attribute
|
158
|
+
html = { :in => '<div><img alt=""wns : next class="artwork" /></div>',
|
159
|
+
:out => '<div><img alt="" wns="" next="" class="artwork"/></div>' }
|
160
|
+
|
161
|
+
parser = NekoHTMLParser.new
|
162
|
+
parser.parse_as_fragment = true
|
163
|
+
parser.skip_banned = false # required to reproduce empty localName
|
164
|
+
|
165
|
+
tree = inner( parser.parse( source( html[ :in ], "UTF-8" ) ) )
|
166
|
+
assert_fragment( html[ :out ], tree )
|
167
|
+
end
|
168
|
+
|
153
169
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: iudex-html
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: 4
|
5
|
-
version: 1.2.b.
|
5
|
+
version: 1.2.b.2
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- David Kellum
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-06-
|
13
|
+
date: 2012-06-23 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: iudex-core
|
@@ -39,9 +39,12 @@ dependencies:
|
|
39
39
|
version_requirements: &id003 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
|
-
- -
|
42
|
+
- - ">="
|
43
43
|
- !ruby/object:Gem::Version
|
44
44
|
version: 1.5.1
|
45
|
+
- - <
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: "1.7"
|
45
48
|
requirement: *id003
|
46
49
|
prerelease: false
|
47
50
|
type: :runtime
|
@@ -117,7 +120,7 @@ files:
|
|
117
120
|
- test/test_stax_parser.rb
|
118
121
|
- test/test_tree_walker.rb
|
119
122
|
- test/test_word_counters.rb
|
120
|
-
- lib/iudex-html/iudex-html-1.2.b.
|
123
|
+
- lib/iudex-html/iudex-html-1.2.b.2.jar
|
121
124
|
homepage: http://github.com/dekellum/iudex
|
122
125
|
licenses: []
|
123
126
|
|