iudex-rome 1.0.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.rdoc +2 -0
- data/Manifest.txt +11 -0
- data/README.rdoc +25 -0
- data/Rakefile +40 -0
- data/lib/iudex-rome/base.rb +23 -0
- data/lib/iudex-rome/iudex-rome-1.0.0.jar +0 -0
- data/lib/iudex-rome.rb +30 -0
- data/pom.xml +56 -0
- data/test/setup.rb +34 -0
- data/test/simple_rss.xml +22 -0
- data/test/test_rome.rb +61 -0
- metadata +130 -0
data/History.rdoc
ADDED
data/Manifest.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
= iudex-rome
|
2
|
+
|
3
|
+
* http://github.com/dekellum/iudex
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Iudex is a general purpose web crawler and feed processor in
|
8
|
+
ruby/java. The iudex-rome gems is an adaption of rjack-rome for feed
|
9
|
+
parsing in Iudex.
|
10
|
+
|
11
|
+
== License
|
12
|
+
|
13
|
+
Copyright (c) 2008-2011 David Kellum
|
14
|
+
|
15
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you
|
16
|
+
may not use this file except in compliance with the License. You
|
17
|
+
may obtain a copy of the License at:
|
18
|
+
|
19
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
20
|
+
|
21
|
+
Unless required by applicable law or agreed to in writing, software
|
22
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
23
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
24
|
+
implied. See the License for the specific language governing
|
25
|
+
permissions and limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
$LOAD_PATH << './lib'
|
4
|
+
require 'iudex-rome/base'
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
gem 'rjack-tarpit', '~> 1.2'
|
8
|
+
require 'rjack-tarpit'
|
9
|
+
|
10
|
+
t = RJack::TarPit.new( 'iudex-rome',
|
11
|
+
Iudex::ROME::VERSION,
|
12
|
+
:no_assembly, :java_platform )
|
13
|
+
|
14
|
+
t.specify do |h|
|
15
|
+
h.developer( "David Kellum", "dek-oss@gravitext.com" )
|
16
|
+
h.extra_deps += [ [ 'iudex-core', '~> 1.0.0' ],
|
17
|
+
[ 'rjack-rome', '~> 1.0.0' ] ]
|
18
|
+
|
19
|
+
h.testlib = :minitest
|
20
|
+
h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
|
21
|
+
[ 'rjack-logback', '~> 1.0' ] ]
|
22
|
+
end
|
23
|
+
|
24
|
+
file 'Manifest.txt' => "lib/#{t.name}/base.rb"
|
25
|
+
|
26
|
+
task :check_pom_version do
|
27
|
+
t.test_line_match( 'pom.xml', /<version>/, /#{t.version}/ )
|
28
|
+
end
|
29
|
+
task :check_history_version do
|
30
|
+
t.test_line_match( 'History.rdoc', /^==/, / #{t.version} / )
|
31
|
+
end
|
32
|
+
task :check_history_date do
|
33
|
+
t.test_line_match( 'History.rdoc', /^==/, /\([0-9\-]+\)$/ )
|
34
|
+
end
|
35
|
+
|
36
|
+
task :gem => [ :check_pom_version, :check_history_version ]
|
37
|
+
task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
|
38
|
+
task :push => [ :check_history_date ]
|
39
|
+
|
40
|
+
t.define_tasks
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
module Iudex
|
18
|
+
module ROME
|
19
|
+
VERSION = '1.0.0'
|
20
|
+
|
21
|
+
LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
|
22
|
+
end
|
23
|
+
end
|
Binary file
|
data/lib/iudex-rome.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-core'
|
18
|
+
require 'rjack-rome'
|
19
|
+
|
20
|
+
require 'iudex-rome/base'
|
21
|
+
|
22
|
+
require 'java'
|
23
|
+
|
24
|
+
module Iudex
|
25
|
+
module ROME
|
26
|
+
require "#{LIB_DIR}/iudex-rome-#{VERSION}.jar"
|
27
|
+
|
28
|
+
import 'iudex.rome.RomeFeedParser'
|
29
|
+
end
|
30
|
+
end
|
data/pom.xml
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
3
|
+
|
4
|
+
<modelVersion>4.0.0</modelVersion>
|
5
|
+
<groupId>iudex</groupId>
|
6
|
+
<artifactId>iudex-rome</artifactId>
|
7
|
+
<packaging>jar</packaging>
|
8
|
+
<version>1.0.0</version>
|
9
|
+
<name>Iudex ROME FeedParser</name>
|
10
|
+
|
11
|
+
<parent>
|
12
|
+
<groupId>iudex</groupId>
|
13
|
+
<artifactId>iudex-parent</artifactId>
|
14
|
+
<version>1.0</version>
|
15
|
+
<relativePath>..</relativePath>
|
16
|
+
</parent>
|
17
|
+
|
18
|
+
<repositories>
|
19
|
+
<repository>
|
20
|
+
<id>maven2-repository.dev.java.net</id>
|
21
|
+
<name>Java.net Repository for Maven</name>
|
22
|
+
<url>http://download.java.net/maven/2/</url>
|
23
|
+
<layout>default</layout>
|
24
|
+
</repository>
|
25
|
+
</repositories>
|
26
|
+
|
27
|
+
<dependencies>
|
28
|
+
|
29
|
+
<dependency>
|
30
|
+
<groupId>iudex</groupId>
|
31
|
+
<artifactId>iudex-core</artifactId>
|
32
|
+
<version>[1.0,1.1)</version>
|
33
|
+
</dependency>
|
34
|
+
|
35
|
+
<dependency>
|
36
|
+
<groupId>rome</groupId>
|
37
|
+
<artifactId>rome</artifactId>
|
38
|
+
<version>1.0</version>
|
39
|
+
</dependency>
|
40
|
+
|
41
|
+
</dependencies>
|
42
|
+
|
43
|
+
<build>
|
44
|
+
<plugins>
|
45
|
+
<plugin>
|
46
|
+
<!-- Parent settings -->
|
47
|
+
<artifactId>maven-compiler-plugin</artifactId>
|
48
|
+
</plugin>
|
49
|
+
<plugin>
|
50
|
+
<!-- Parent settings -->
|
51
|
+
<artifactId>maven-source-plugin</artifactId>
|
52
|
+
</plugin>
|
53
|
+
</plugins>
|
54
|
+
</build>
|
55
|
+
|
56
|
+
</project>
|
data/test/setup.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
#### General test setup: LOAD_PATH, logging, console output ####
|
18
|
+
|
19
|
+
ldir = File.join( File.dirname( __FILE__ ), "..", "lib" )
|
20
|
+
$LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
|
21
|
+
|
22
|
+
require 'rubygems'
|
23
|
+
require 'rjack-logback'
|
24
|
+
RJack::Logback.config_console( :stderr => true )
|
25
|
+
|
26
|
+
require 'minitest/unit'
|
27
|
+
require 'minitest/autorun'
|
28
|
+
|
29
|
+
# Make test output logging compatible: no partial lines.
|
30
|
+
class TestOut
|
31
|
+
def print( *a ); $stdout.puts( *a ); end
|
32
|
+
def puts( *a ); $stdout.puts( *a ); end
|
33
|
+
end
|
34
|
+
MiniTest::Unit.output = TestOut.new
|
data/test/simple_rss.xml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<?xml-stylesheet href="/css/rss20.xsl" type="text/xsl"?>
|
3
|
+
<rss version="2.0"
|
4
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
5
|
+
xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom">
|
6
|
+
<channel>
|
7
|
+
<title>Channel Title</title>
|
8
|
+
<link>http://iudex.gravitext.com/test/rss.xml</link>
|
9
|
+
<description>Channel Description</description>
|
10
|
+
<language>en-us</language>
|
11
|
+
<ttl>30</ttl>
|
12
|
+
<atom:link rel="self" href="http://iudex.gravitext.com/test/rss.xml" type="application/rss+xml"/>
|
13
|
+
<item>
|
14
|
+
<title>Item Title</title>
|
15
|
+
<link>http://iudex.gravitext.com/test/item/1.html?click_track=a79bna7</link>
|
16
|
+
<guid isPermaLink="false">>http://iudex.gravitext.com/test/item/1.html</guid>
|
17
|
+
<pubDate>Sat, 06 Nov 2010 20:20:00 EDT</pubDate>
|
18
|
+
<description><![CDATA[Item Description with <i>HTML</i>]]></description>
|
19
|
+
<dc:creator>David Kellum</dc:creator>
|
20
|
+
</item>
|
21
|
+
</channel>
|
22
|
+
</rss>
|
data/test/test_rome.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'iudex-rome'
|
23
|
+
|
24
|
+
class TestRome < MiniTest::Unit::TestCase
|
25
|
+
include Iudex::Core
|
26
|
+
include Iudex::ROME
|
27
|
+
include Gravitext::HTMap
|
28
|
+
|
29
|
+
import 'java.nio.ByteBuffer'
|
30
|
+
import 'com.gravitext.util.Charsets'
|
31
|
+
|
32
|
+
UniMap.define_accessors
|
33
|
+
|
34
|
+
SIMPLE_RSS = File.join( File.dirname( __FILE__ ), 'simple_rss.xml' )
|
35
|
+
|
36
|
+
def test_parse
|
37
|
+
parser = RomeFeedParser.new
|
38
|
+
map = UniMap.new
|
39
|
+
|
40
|
+
rss_bytes = ByteBuffer.wrap( File.read( SIMPLE_RSS ).to_java_bytes )
|
41
|
+
source = ContentSource.new( rss_bytes )
|
42
|
+
source.default_encoding = Charsets::UTF_8
|
43
|
+
map.source = source
|
44
|
+
|
45
|
+
parser.filter( map )
|
46
|
+
|
47
|
+
assert_equal( "Channel Title", map.title )
|
48
|
+
|
49
|
+
assert( item = map.references.first )
|
50
|
+
|
51
|
+
assert_equal( "Item Title", item.title )
|
52
|
+
assert_equal( "http://iudex.gravitext.com/test/item/1.html" +
|
53
|
+
"?click_track=a79bna7",
|
54
|
+
item.url.to_s )
|
55
|
+
assert( item.pub_date )
|
56
|
+
assert_equal( item.pub_date, item.ref_pub_date )
|
57
|
+
assert( item.summary )
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
metadata
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: iudex-rome
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 1.0.0
|
6
|
+
platform: java
|
7
|
+
authors:
|
8
|
+
- David Kellum
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-04-04 00:00:00 -07:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: iudex-core
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 1.0.0
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rjack-rome
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ~>
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 1.0.0
|
36
|
+
type: :runtime
|
37
|
+
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: minitest
|
40
|
+
prerelease: false
|
41
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 1.7.1
|
47
|
+
- - <
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: "2.1"
|
50
|
+
type: :development
|
51
|
+
version_requirements: *id003
|
52
|
+
- !ruby/object:Gem::Dependency
|
53
|
+
name: rjack-logback
|
54
|
+
prerelease: false
|
55
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ~>
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "1.0"
|
61
|
+
type: :development
|
62
|
+
version_requirements: *id004
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: rjack-tarpit
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ~>
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: 1.3.0
|
72
|
+
type: :development
|
73
|
+
version_requirements: *id005
|
74
|
+
description: |-
|
75
|
+
Iudex is a general purpose web crawler and feed processor in
|
76
|
+
ruby/java. The iudex-rome gems is an adaption of rjack-rome for feed
|
77
|
+
parsing in Iudex.
|
78
|
+
email:
|
79
|
+
- dek-oss@gravitext.com
|
80
|
+
executables: []
|
81
|
+
|
82
|
+
extensions: []
|
83
|
+
|
84
|
+
extra_rdoc_files:
|
85
|
+
- Manifest.txt
|
86
|
+
- History.rdoc
|
87
|
+
- README.rdoc
|
88
|
+
files:
|
89
|
+
- History.rdoc
|
90
|
+
- Manifest.txt
|
91
|
+
- README.rdoc
|
92
|
+
- Rakefile
|
93
|
+
- pom.xml
|
94
|
+
- lib/iudex-rome/base.rb
|
95
|
+
- lib/iudex-rome.rb
|
96
|
+
- test/setup.rb
|
97
|
+
- test/simple_rss.xml
|
98
|
+
- test/test_rome.rb
|
99
|
+
- lib/iudex-rome/iudex-rome-1.0.0.jar
|
100
|
+
has_rdoc: true
|
101
|
+
homepage: http://github.com/dekellum/iudex
|
102
|
+
licenses: []
|
103
|
+
|
104
|
+
post_install_message:
|
105
|
+
rdoc_options:
|
106
|
+
- --main
|
107
|
+
- README.rdoc
|
108
|
+
require_paths:
|
109
|
+
- lib
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
111
|
+
none: false
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: "0"
|
116
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
|
+
none: false
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: "0"
|
122
|
+
requirements: []
|
123
|
+
|
124
|
+
rubyforge_project: iudex-rome
|
125
|
+
rubygems_version: 1.5.1
|
126
|
+
signing_key:
|
127
|
+
specification_version: 3
|
128
|
+
summary: Iudex is a general purpose web crawler and feed processor in ruby/java
|
129
|
+
test_files:
|
130
|
+
- test/test_rome.rb
|