iudex-rome 1.0.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.rdoc ADDED
@@ -0,0 +1,2 @@
1
+ === 1.0.0 (2011-04-04)
2
+ * Initial release.
data/Manifest.txt ADDED
@@ -0,0 +1,11 @@
1
+ History.rdoc
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ pom.xml
6
+ lib/iudex-rome/base.rb
7
+ lib/iudex-rome.rb
8
+ test/setup.rb
9
+ test/simple_rss.xml
10
+ test/test_rome.rb
11
+ lib/iudex-rome/iudex-rome-1.0.0.jar
data/README.rdoc ADDED
@@ -0,0 +1,25 @@
1
+ = iudex-rome
2
+
3
+ * http://github.com/dekellum/iudex
4
+
5
+ == Description
6
+
7
+ Iudex is a general purpose web crawler and feed processor in
8
+ ruby/java. The iudex-rome gems is an adaption of rjack-rome for feed
9
+ parsing in Iudex.
10
+
11
+ == License
12
+
13
+ Copyright (c) 2008-2011 David Kellum
14
+
15
+ Licensed under the Apache License, Version 2.0 (the "License"); you
16
+ may not use this file except in compliance with the License. You
17
+ may obtain a copy of the License at:
18
+
19
+ http://www.apache.org/licenses/LICENSE-2.0
20
+
21
+ Unless required by applicable law or agreed to in writing, software
22
+ distributed under the License is distributed on an "AS IS" BASIS,
23
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
24
+ implied. See the License for the specific language governing
25
+ permissions and limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,40 @@
1
+ # -*- ruby -*-
2
+
3
+ $LOAD_PATH << './lib'
4
+ require 'iudex-rome/base'
5
+
6
+ require 'rubygems'
7
+ gem 'rjack-tarpit', '~> 1.2'
8
+ require 'rjack-tarpit'
9
+
10
+ t = RJack::TarPit.new( 'iudex-rome',
11
+ Iudex::ROME::VERSION,
12
+ :no_assembly, :java_platform )
13
+
14
+ t.specify do |h|
15
+ h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
+ h.extra_deps += [ [ 'iudex-core', '~> 1.0.0' ],
17
+ [ 'rjack-rome', '~> 1.0.0' ] ]
18
+
19
+ h.testlib = :minitest
20
+ h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
21
+ [ 'rjack-logback', '~> 1.0' ] ]
22
+ end
23
+
24
+ file 'Manifest.txt' => "lib/#{t.name}/base.rb"
25
+
26
+ task :check_pom_version do
27
+ t.test_line_match( 'pom.xml', /<version>/, /#{t.version}/ )
28
+ end
29
+ task :check_history_version do
30
+ t.test_line_match( 'History.rdoc', /^==/, / #{t.version} / )
31
+ end
32
+ task :check_history_date do
33
+ t.test_line_match( 'History.rdoc', /^==/, /\([0-9\-]+\)$/ )
34
+ end
35
+
36
+ task :gem => [ :check_pom_version, :check_history_version ]
37
+ task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
38
+ task :push => [ :check_history_date ]
39
+
40
+ t.define_tasks
@@ -0,0 +1,23 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ module Iudex
18
+ module ROME
19
+ VERSION = '1.0.0'
20
+
21
+ LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
+ end
23
+ end
Binary file
data/lib/iudex-rome.rb ADDED
@@ -0,0 +1,30 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-core'
18
+ require 'rjack-rome'
19
+
20
+ require 'iudex-rome/base'
21
+
22
+ require 'java'
23
+
24
+ module Iudex
25
+ module ROME
26
+ require "#{LIB_DIR}/iudex-rome-#{VERSION}.jar"
27
+
28
+ import 'iudex.rome.RomeFeedParser'
29
+ end
30
+ end
data/pom.xml ADDED
@@ -0,0 +1,56 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
+
4
+ <modelVersion>4.0.0</modelVersion>
5
+ <groupId>iudex</groupId>
6
+ <artifactId>iudex-rome</artifactId>
7
+ <packaging>jar</packaging>
8
+ <version>1.0.0</version>
9
+ <name>Iudex ROME FeedParser</name>
10
+
11
+ <parent>
12
+ <groupId>iudex</groupId>
13
+ <artifactId>iudex-parent</artifactId>
14
+ <version>1.0</version>
15
+ <relativePath>..</relativePath>
16
+ </parent>
17
+
18
+ <repositories>
19
+ <repository>
20
+ <id>maven2-repository.dev.java.net</id>
21
+ <name>Java.net Repository for Maven</name>
22
+ <url>http://download.java.net/maven/2/</url>
23
+ <layout>default</layout>
24
+ </repository>
25
+ </repositories>
26
+
27
+ <dependencies>
28
+
29
+ <dependency>
30
+ <groupId>iudex</groupId>
31
+ <artifactId>iudex-core</artifactId>
32
+ <version>[1.0,1.1)</version>
33
+ </dependency>
34
+
35
+ <dependency>
36
+ <groupId>rome</groupId>
37
+ <artifactId>rome</artifactId>
38
+ <version>1.0</version>
39
+ </dependency>
40
+
41
+ </dependencies>
42
+
43
+ <build>
44
+ <plugins>
45
+ <plugin>
46
+ <!-- Parent settings -->
47
+ <artifactId>maven-compiler-plugin</artifactId>
48
+ </plugin>
49
+ <plugin>
50
+ <!-- Parent settings -->
51
+ <artifactId>maven-source-plugin</artifactId>
52
+ </plugin>
53
+ </plugins>
54
+ </build>
55
+
56
+ </project>
data/test/setup.rb ADDED
@@ -0,0 +1,34 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ #### General test setup: LOAD_PATH, logging, console output ####
18
+
19
+ ldir = File.join( File.dirname( __FILE__ ), "..", "lib" )
20
+ $LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
21
+
22
+ require 'rubygems'
23
+ require 'rjack-logback'
24
+ RJack::Logback.config_console( :stderr => true )
25
+
26
+ require 'minitest/unit'
27
+ require 'minitest/autorun'
28
+
29
+ # Make test output logging compatible: no partial lines.
30
+ class TestOut
31
+ def print( *a ); $stdout.puts( *a ); end
32
+ def puts( *a ); $stdout.puts( *a ); end
33
+ end
34
+ MiniTest::Unit.output = TestOut.new
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <?xml-stylesheet href="/css/rss20.xsl" type="text/xsl"?>
3
+ <rss version="2.0"
4
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
5
+ xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom">
6
+ <channel>
7
+ <title>Channel Title</title>
8
+ <link>http://iudex.gravitext.com/test/rss.xml</link>
9
+ <description>Channel Description</description>
10
+ <language>en-us</language>
11
+ <ttl>30</ttl>
12
+ <atom:link rel="self" href="http://iudex.gravitext.com/test/rss.xml" type="application/rss+xml"/>
13
+ <item>
14
+ <title>Item Title</title>
15
+ <link>http://iudex.gravitext.com/test/item/1.html?click_track=a79bna7</link>
16
+ <guid isPermaLink="false">>http://iudex.gravitext.com/test/item/1.html</guid>
17
+ <pubDate>Sat, 06 Nov 2010 20:20:00 EDT</pubDate>
18
+ <description><![CDATA[Item Description with <i>HTML</i>]]></description>
19
+ <dc:creator>David Kellum</dc:creator>
20
+ </item>
21
+ </channel>
22
+ </rss>
data/test/test_rome.rb ADDED
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env jruby
2
+ #.hashdot.profile += jruby-shortlived
3
+
4
+ #--
5
+ # Copyright (c) 2008-2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You
9
+ # may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+
20
+ require File.join( File.dirname( __FILE__ ), "setup" )
21
+
22
+ require 'iudex-rome'
23
+
24
+ class TestRome < MiniTest::Unit::TestCase
25
+ include Iudex::Core
26
+ include Iudex::ROME
27
+ include Gravitext::HTMap
28
+
29
+ import 'java.nio.ByteBuffer'
30
+ import 'com.gravitext.util.Charsets'
31
+
32
+ UniMap.define_accessors
33
+
34
+ SIMPLE_RSS = File.join( File.dirname( __FILE__ ), 'simple_rss.xml' )
35
+
36
+ def test_parse
37
+ parser = RomeFeedParser.new
38
+ map = UniMap.new
39
+
40
+ rss_bytes = ByteBuffer.wrap( File.read( SIMPLE_RSS ).to_java_bytes )
41
+ source = ContentSource.new( rss_bytes )
42
+ source.default_encoding = Charsets::UTF_8
43
+ map.source = source
44
+
45
+ parser.filter( map )
46
+
47
+ assert_equal( "Channel Title", map.title )
48
+
49
+ assert( item = map.references.first )
50
+
51
+ assert_equal( "Item Title", item.title )
52
+ assert_equal( "http://iudex.gravitext.com/test/item/1.html" +
53
+ "?click_track=a79bna7",
54
+ item.url.to_s )
55
+ assert( item.pub_date )
56
+ assert_equal( item.pub_date, item.ref_pub_date )
57
+ assert( item.summary )
58
+
59
+ end
60
+
61
+ end
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iudex-rome
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 1.0.0
6
+ platform: java
7
+ authors:
8
+ - David Kellum
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-04-04 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: iudex-core
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: 1.0.0
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: rjack-rome
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ~>
34
+ - !ruby/object:Gem::Version
35
+ version: 1.0.0
36
+ type: :runtime
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: minitest
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.7.1
47
+ - - <
48
+ - !ruby/object:Gem::Version
49
+ version: "2.1"
50
+ type: :development
51
+ version_requirements: *id003
52
+ - !ruby/object:Gem::Dependency
53
+ name: rjack-logback
54
+ prerelease: false
55
+ requirement: &id004 !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ~>
59
+ - !ruby/object:Gem::Version
60
+ version: "1.0"
61
+ type: :development
62
+ version_requirements: *id004
63
+ - !ruby/object:Gem::Dependency
64
+ name: rjack-tarpit
65
+ prerelease: false
66
+ requirement: &id005 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ~>
70
+ - !ruby/object:Gem::Version
71
+ version: 1.3.0
72
+ type: :development
73
+ version_requirements: *id005
74
+ description: |-
75
+ Iudex is a general purpose web crawler and feed processor in
76
+ ruby/java. The iudex-rome gems is an adaption of rjack-rome for feed
77
+ parsing in Iudex.
78
+ email:
79
+ - dek-oss@gravitext.com
80
+ executables: []
81
+
82
+ extensions: []
83
+
84
+ extra_rdoc_files:
85
+ - Manifest.txt
86
+ - History.rdoc
87
+ - README.rdoc
88
+ files:
89
+ - History.rdoc
90
+ - Manifest.txt
91
+ - README.rdoc
92
+ - Rakefile
93
+ - pom.xml
94
+ - lib/iudex-rome/base.rb
95
+ - lib/iudex-rome.rb
96
+ - test/setup.rb
97
+ - test/simple_rss.xml
98
+ - test/test_rome.rb
99
+ - lib/iudex-rome/iudex-rome-1.0.0.jar
100
+ has_rdoc: true
101
+ homepage: http://github.com/dekellum/iudex
102
+ licenses: []
103
+
104
+ post_install_message:
105
+ rdoc_options:
106
+ - --main
107
+ - README.rdoc
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ none: false
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: "0"
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: "0"
122
+ requirements: []
123
+
124
+ rubyforge_project: iudex-rome
125
+ rubygems_version: 1.5.1
126
+ signing_key:
127
+ specification_version: 3
128
+ summary: Iudex is a general purpose web crawler and feed processor in ruby/java
129
+ test_files:
130
+ - test/test_rome.rb