iudex-html 1.4.0-java → 1.7.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/History.rdoc +3 -0
- data/Manifest.txt +1 -1
- data/README.rdoc +1 -1
- data/bin/iudex-html-clean +1 -1
- data/bin/iudex-html-perftest +1 -1
- data/build/HTML.java.erb +1 -1
- data/build/attributes +1 -1
- data/build/java_generate.rb +1 -1
- data/build/tags +1 -1
- data/lib/iudex-html.rb +1 -1
- data/lib/iudex-html/base.rb +2 -2
- data/lib/iudex-html/factory_helper.rb +1 -1
- data/lib/iudex-html/iudex-html-1.7.0.jar +0 -0
- data/pom.xml +3 -3
- data/test/html_test_helper.rb +1 -1
- data/test/setup.rb +1 -1
- data/test/test_characters_normalizer.rb +1 -1
- data/test/test_extract_filter.rb +1 -1
- data/test/test_factory_helper.rb +1 -1
- data/test/test_html_parser.rb +1 -1
- data/test/test_other_filters.rb +1 -1
- data/test/test_other_tree_filters.rb +1 -1
- data/test/test_parse_filter.rb +1 -1
- data/test/test_stax_parser.rb +1 -1
- data/test/test_tree_walker.rb +1 -1
- data/test/test_word_counters.rb +1 -1
- metadata +36 -56
- data/lib/iudex-html/iudex-html-1.4.0.jar +0 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 8562423f11f6ec337a56e96f9c0ee172fd789f65
|
|
4
|
+
data.tar.gz: 206f0dbd3bb2055e0b600ff6f607943af79a56fe
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 55f5d8f05329a4f4fd2330c23b8a0f03eb518c71d4f847efa0d7d081cc43969851f37331a1264703b1f8dbb4ba99957623b6843a8987fc8962a1dee0b3c05bb4
|
|
7
|
+
data.tar.gz: f07d067eee916aa6e63d321820c2a76bde6ca141a47dfe8c396f93b890547bf7f893f4d048e9bd034b756419b86e1798b7271b0ed0023aee4272139923edee3d
|
data/History.rdoc
CHANGED
data/Manifest.txt
CHANGED
data/README.rdoc
CHANGED
|
@@ -11,7 +11,7 @@ filtering, exracting text and links.
|
|
|
11
11
|
|
|
12
12
|
== License
|
|
13
13
|
|
|
14
|
-
Copyright (c)
|
|
14
|
+
Copyright (c) 2010-2015 David Kellum
|
|
15
15
|
|
|
16
16
|
Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
17
17
|
may not use this file except in compliance with the License. You
|
data/bin/iudex-html-clean
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env jruby
|
|
2
2
|
# -*- ruby -*-
|
|
3
3
|
#--
|
|
4
|
-
# Copyright (c)
|
|
4
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
7
7
|
# may not use this file except in compliance with the License. You may
|
data/bin/iudex-html-perftest
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env jruby
|
|
2
2
|
# -*- ruby -*-
|
|
3
3
|
#--
|
|
4
|
-
# Copyright (c)
|
|
4
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
7
7
|
# may not use this file except in compliance with the License. You may
|
data/build/HTML.java.erb
CHANGED
data/build/attributes
CHANGED
data/build/java_generate.rb
CHANGED
data/build/tags
CHANGED
data/lib/iudex-html.rb
CHANGED
data/lib/iudex-html/base.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#--
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
5
5
|
# may not use this file except in compliance with the License. You may
|
|
@@ -16,6 +16,6 @@
|
|
|
16
16
|
|
|
17
17
|
module Iudex
|
|
18
18
|
module HTML
|
|
19
|
-
VERSION = '1.
|
|
19
|
+
VERSION = '1.7.0'
|
|
20
20
|
end
|
|
21
21
|
end
|
|
Binary file
|
data/pom.xml
CHANGED
|
@@ -3,13 +3,13 @@
|
|
|
3
3
|
<groupId>iudex</groupId>
|
|
4
4
|
<artifactId>iudex-html</artifactId>
|
|
5
5
|
<packaging>jar</packaging>
|
|
6
|
-
<version>1.
|
|
6
|
+
<version>1.7.0</version>
|
|
7
7
|
<name>Iudex HTML parsing/filtering and text extraction</name>
|
|
8
8
|
|
|
9
9
|
<parent>
|
|
10
10
|
<groupId>iudex</groupId>
|
|
11
11
|
<artifactId>iudex-parent</artifactId>
|
|
12
|
-
<version>1.
|
|
12
|
+
<version>1.7.0</version>
|
|
13
13
|
<relativePath>..</relativePath>
|
|
14
14
|
</parent>
|
|
15
15
|
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
<dependency>
|
|
19
19
|
<groupId>iudex</groupId>
|
|
20
20
|
<artifactId>iudex-core</artifactId>
|
|
21
|
-
<version>[1.
|
|
21
|
+
<version>[1.7.0,1.999)</version>
|
|
22
22
|
</dependency>
|
|
23
23
|
|
|
24
24
|
<dependency>
|
data/test/html_test_helper.rb
CHANGED
data/test/setup.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
|
3
3
|
|
|
4
4
|
#--
|
|
5
|
-
# Copyright (c)
|
|
5
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
8
8
|
# may not use this file except in compliance with the License. You
|
data/test/test_extract_filter.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#.hashdot.profile += jruby-shortlived
|
|
4
4
|
|
|
5
5
|
#--
|
|
6
|
-
# Copyright (c)
|
|
6
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
7
7
|
#
|
|
8
8
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
9
9
|
# may not use this file except in compliance with the License. You
|
data/test/test_factory_helper.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#.hashdot.profile += jruby-shortlived
|
|
4
4
|
|
|
5
5
|
#--
|
|
6
|
-
# Copyright (c)
|
|
6
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
7
7
|
#
|
|
8
8
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
9
9
|
# may not use this file except in compliance with the License. You
|
data/test/test_html_parser.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#.hashdot.profile += jruby-shortlived
|
|
4
4
|
|
|
5
5
|
#--
|
|
6
|
-
# Copyright (c)
|
|
6
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
7
7
|
#
|
|
8
8
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
9
9
|
# may not use this file except in compliance with the License. You
|
data/test/test_other_filters.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#.hashdot.profile += jruby-shortlived
|
|
4
4
|
|
|
5
5
|
#--
|
|
6
|
-
# Copyright (c)
|
|
6
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
7
7
|
#
|
|
8
8
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
9
9
|
# may not use this file except in compliance with the License. You
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#.hashdot.profile += jruby-shortlived
|
|
4
4
|
|
|
5
5
|
#--
|
|
6
|
-
# Copyright (c)
|
|
6
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
7
7
|
#
|
|
8
8
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
9
9
|
# may not use this file except in compliance with the License. You
|
data/test/test_parse_filter.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
|
3
3
|
|
|
4
4
|
#--
|
|
5
|
-
# Copyright (c)
|
|
5
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
8
8
|
# may not use this file except in compliance with the License. You
|
data/test/test_stax_parser.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#.hashdot.profile += jruby-shortlived
|
|
4
4
|
|
|
5
5
|
#--
|
|
6
|
-
# Copyright (c)
|
|
6
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
7
7
|
#
|
|
8
8
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
9
9
|
# may not use this file except in compliance with the License. You
|
data/test/test_tree_walker.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
|
3
3
|
|
|
4
4
|
#--
|
|
5
|
-
# Copyright (c)
|
|
5
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
8
8
|
# may not use this file except in compliance with the License. You
|
data/test/test_word_counters.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#.hashdot.profile += jruby-shortlived
|
|
4
4
|
|
|
5
5
|
#--
|
|
6
|
-
# Copyright (c)
|
|
6
|
+
# Copyright (c) 2010-2015 David Kellum
|
|
7
7
|
#
|
|
8
8
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
9
9
|
# may not use this file except in compliance with the License. You
|
metadata
CHANGED
|
@@ -1,112 +1,99 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iudex-html
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
|
|
5
|
-
version: 1.4.0
|
|
4
|
+
version: 1.7.0
|
|
6
5
|
platform: java
|
|
7
6
|
authors:
|
|
8
7
|
- David Kellum
|
|
9
8
|
autorequire:
|
|
10
9
|
bindir: bin
|
|
11
10
|
cert_chain: []
|
|
12
|
-
date:
|
|
11
|
+
date: 2015-05-04 00:00:00.000000000 Z
|
|
13
12
|
dependencies:
|
|
14
13
|
- !ruby/object:Gem::Dependency
|
|
15
|
-
name: iudex-core
|
|
16
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
17
|
-
requirements:
|
|
18
|
-
- - ~>
|
|
19
|
-
- !ruby/object:Gem::Version
|
|
20
|
-
version: 1.4.0
|
|
21
|
-
none: false
|
|
22
14
|
requirement: !ruby/object:Gem::Requirement
|
|
23
15
|
requirements:
|
|
24
16
|
- - ~>
|
|
25
17
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 1.
|
|
27
|
-
|
|
18
|
+
version: '1.7'
|
|
19
|
+
name: iudex-core
|
|
28
20
|
prerelease: false
|
|
29
21
|
type: :runtime
|
|
30
|
-
- !ruby/object:Gem::Dependency
|
|
31
|
-
name: rjack-nekohtml
|
|
32
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
33
23
|
requirements:
|
|
34
24
|
- - ~>
|
|
35
25
|
- !ruby/object:Gem::Version
|
|
36
|
-
version: 1.
|
|
37
|
-
|
|
26
|
+
version: '1.7'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
38
28
|
requirement: !ruby/object:Gem::Requirement
|
|
39
29
|
requirements:
|
|
40
30
|
- - ~>
|
|
41
31
|
- !ruby/object:Gem::Version
|
|
42
32
|
version: 1.9.18
|
|
43
|
-
|
|
33
|
+
name: rjack-nekohtml
|
|
44
34
|
prerelease: false
|
|
45
35
|
type: :runtime
|
|
46
|
-
- !ruby/object:Gem::Dependency
|
|
47
|
-
name: gravitext-xmlprod
|
|
48
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
49
37
|
requirements:
|
|
50
38
|
- - ~>
|
|
51
39
|
- !ruby/object:Gem::Version
|
|
52
|
-
version: 1.
|
|
53
|
-
|
|
40
|
+
version: 1.9.18
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
54
42
|
requirement: !ruby/object:Gem::Requirement
|
|
55
43
|
requirements:
|
|
56
44
|
- - ~>
|
|
57
45
|
- !ruby/object:Gem::Version
|
|
58
46
|
version: 1.7.0
|
|
59
|
-
|
|
47
|
+
name: gravitext-xmlprod
|
|
60
48
|
prerelease: false
|
|
61
49
|
type: :runtime
|
|
62
|
-
- !ruby/object:Gem::Dependency
|
|
63
|
-
name: minitest
|
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
51
|
requirements:
|
|
66
52
|
- - ~>
|
|
67
53
|
- !ruby/object:Gem::Version
|
|
68
|
-
version:
|
|
69
|
-
|
|
54
|
+
version: 1.7.0
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
70
56
|
requirement: !ruby/object:Gem::Requirement
|
|
71
57
|
requirements:
|
|
72
58
|
- - ~>
|
|
73
59
|
- !ruby/object:Gem::Version
|
|
74
60
|
version: 4.7.4
|
|
75
|
-
|
|
61
|
+
name: minitest
|
|
76
62
|
prerelease: false
|
|
77
63
|
type: :development
|
|
78
|
-
- !ruby/object:Gem::Dependency
|
|
79
|
-
name: rjack-logback
|
|
80
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
81
65
|
requirements:
|
|
82
66
|
- - ~>
|
|
83
67
|
- !ruby/object:Gem::Version
|
|
84
|
-
version:
|
|
85
|
-
|
|
68
|
+
version: 4.7.4
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
86
70
|
requirement: !ruby/object:Gem::Requirement
|
|
87
71
|
requirements:
|
|
88
72
|
- - ~>
|
|
89
73
|
- !ruby/object:Gem::Version
|
|
90
74
|
version: '1.5'
|
|
91
|
-
|
|
75
|
+
name: rjack-logback
|
|
92
76
|
prerelease: false
|
|
93
77
|
type: :development
|
|
94
|
-
- !ruby/object:Gem::Dependency
|
|
95
|
-
name: rjack-tarpit
|
|
96
78
|
version_requirements: !ruby/object:Gem::Requirement
|
|
97
79
|
requirements:
|
|
98
80
|
- - ~>
|
|
99
81
|
- !ruby/object:Gem::Version
|
|
100
|
-
version: '
|
|
101
|
-
|
|
82
|
+
version: '1.5'
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
102
84
|
requirement: !ruby/object:Gem::Requirement
|
|
103
85
|
requirements:
|
|
104
86
|
- - ~>
|
|
105
87
|
- !ruby/object:Gem::Version
|
|
106
|
-
version: '2.
|
|
107
|
-
|
|
88
|
+
version: '2.1'
|
|
89
|
+
name: rjack-tarpit
|
|
108
90
|
prerelease: false
|
|
109
91
|
type: :development
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - ~>
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: '2.1'
|
|
110
97
|
description: Iudex is a general purpose web crawler and feed processor in ruby/java. The iudex-html gem contains filters for HTML parsing, filtering, exracting text and links.
|
|
111
98
|
email:
|
|
112
99
|
- dek-oss@gravitext.com
|
|
@@ -122,16 +109,17 @@ files:
|
|
|
122
109
|
- Manifest.txt
|
|
123
110
|
- README.rdoc
|
|
124
111
|
- Rakefile
|
|
125
|
-
- pom.xml
|
|
126
112
|
- bin/iudex-html-clean
|
|
127
113
|
- bin/iudex-html-perftest
|
|
128
114
|
- build/HTML.java.erb
|
|
129
115
|
- build/attributes
|
|
130
116
|
- build/java_generate.rb
|
|
131
117
|
- build/tags
|
|
132
|
-
- lib/iudex-html/base.rb
|
|
133
118
|
- lib/iudex-html.rb
|
|
119
|
+
- lib/iudex-html/base.rb
|
|
134
120
|
- lib/iudex-html/factory_helper.rb
|
|
121
|
+
- lib/iudex-html/iudex-html-1.7.0.jar
|
|
122
|
+
- pom.xml
|
|
135
123
|
- test/html_test_helper.rb
|
|
136
124
|
- test/reddit.xhtml
|
|
137
125
|
- test/setup.rb
|
|
@@ -145,9 +133,10 @@ files:
|
|
|
145
133
|
- test/test_stax_parser.rb
|
|
146
134
|
- test/test_tree_walker.rb
|
|
147
135
|
- test/test_word_counters.rb
|
|
148
|
-
- lib/iudex-html/iudex-html-1.4.0.jar
|
|
149
136
|
homepage: http://iudex.gravitext.com
|
|
150
|
-
licenses:
|
|
137
|
+
licenses:
|
|
138
|
+
- Apache-2.0
|
|
139
|
+
metadata: {}
|
|
151
140
|
post_install_message:
|
|
152
141
|
rdoc_options:
|
|
153
142
|
- --main
|
|
@@ -156,27 +145,18 @@ require_paths:
|
|
|
156
145
|
- lib
|
|
157
146
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
158
147
|
requirements:
|
|
159
|
-
- -
|
|
148
|
+
- - '>='
|
|
160
149
|
- !ruby/object:Gem::Version
|
|
161
|
-
version:
|
|
162
|
-
segments:
|
|
163
|
-
- 0
|
|
164
|
-
hash: 2
|
|
165
|
-
none: false
|
|
150
|
+
version: 1.8.7
|
|
166
151
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
167
152
|
requirements:
|
|
168
|
-
- -
|
|
153
|
+
- - '>='
|
|
169
154
|
- !ruby/object:Gem::Version
|
|
170
155
|
version: '0'
|
|
171
|
-
segments:
|
|
172
|
-
- 0
|
|
173
|
-
hash: 2
|
|
174
|
-
none: false
|
|
175
156
|
requirements: []
|
|
176
157
|
rubyforge_project:
|
|
177
|
-
rubygems_version:
|
|
158
|
+
rubygems_version: 2.4.5
|
|
178
159
|
signing_key:
|
|
179
|
-
specification_version:
|
|
160
|
+
specification_version: 4
|
|
180
161
|
summary: Iudex is a general purpose web crawler and feed processor in ruby/java.
|
|
181
162
|
test_files: []
|
|
182
|
-
...
|
|
Binary file
|