iudex-rome 1.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/History.rdoc ADDED
@@ -0,0 +1,2 @@
1
+ === 1.0.0 (2011-04-04)
2
+ * Initial release.
data/Manifest.txt ADDED
@@ -0,0 +1,11 @@
1
+ History.rdoc
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ pom.xml
6
+ lib/iudex-rome/base.rb
7
+ lib/iudex-rome.rb
8
+ test/setup.rb
9
+ test/simple_rss.xml
10
+ test/test_rome.rb
11
+ lib/iudex-rome/iudex-rome-1.0.0.jar
data/README.rdoc ADDED
@@ -0,0 +1,25 @@
1
+ = iudex-rome
2
+
3
+ * http://github.com/dekellum/iudex
4
+
5
+ == Description
6
+
7
+ Iudex is a general purpose web crawler and feed processor in
8
+ ruby/java. The iudex-rome gems is an adaption of rjack-rome for feed
9
+ parsing in Iudex.
10
+
11
+ == License
12
+
13
+ Copyright (c) 2008-2011 David Kellum
14
+
15
+ Licensed under the Apache License, Version 2.0 (the "License"); you
16
+ may not use this file except in compliance with the License. You
17
+ may obtain a copy of the License at:
18
+
19
+ http://www.apache.org/licenses/LICENSE-2.0
20
+
21
+ Unless required by applicable law or agreed to in writing, software
22
+ distributed under the License is distributed on an "AS IS" BASIS,
23
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
24
+ implied. See the License for the specific language governing
25
+ permissions and limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,40 @@
1
+ # -*- ruby -*-
2
+
3
+ $LOAD_PATH << './lib'
4
+ require 'iudex-rome/base'
5
+
6
+ require 'rubygems'
7
+ gem 'rjack-tarpit', '~> 1.2'
8
+ require 'rjack-tarpit'
9
+
10
+ t = RJack::TarPit.new( 'iudex-rome',
11
+ Iudex::ROME::VERSION,
12
+ :no_assembly, :java_platform )
13
+
14
+ t.specify do |h|
15
+ h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
+ h.extra_deps += [ [ 'iudex-core', '~> 1.0.0' ],
17
+ [ 'rjack-rome', '~> 1.0.0' ] ]
18
+
19
+ h.testlib = :minitest
20
+ h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
21
+ [ 'rjack-logback', '~> 1.0' ] ]
22
+ end
23
+
24
+ file 'Manifest.txt' => "lib/#{t.name}/base.rb"
25
+
26
+ task :check_pom_version do
27
+ t.test_line_match( 'pom.xml', /<version>/, /#{t.version}/ )
28
+ end
29
+ task :check_history_version do
30
+ t.test_line_match( 'History.rdoc', /^==/, / #{t.version} / )
31
+ end
32
+ task :check_history_date do
33
+ t.test_line_match( 'History.rdoc', /^==/, /\([0-9\-]+\)$/ )
34
+ end
35
+
36
+ task :gem => [ :check_pom_version, :check_history_version ]
37
+ task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
38
+ task :push => [ :check_history_date ]
39
+
40
+ t.define_tasks
@@ -0,0 +1,23 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ module Iudex
18
+ module ROME
19
+ VERSION = '1.0.0'
20
+
21
+ LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
+ end
23
+ end
Binary file
data/lib/iudex-rome.rb ADDED
@@ -0,0 +1,30 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-core'
18
+ require 'rjack-rome'
19
+
20
+ require 'iudex-rome/base'
21
+
22
+ require 'java'
23
+
24
+ module Iudex
25
+ module ROME
26
+ require "#{LIB_DIR}/iudex-rome-#{VERSION}.jar"
27
+
28
+ import 'iudex.rome.RomeFeedParser'
29
+ end
30
+ end
data/pom.xml ADDED
@@ -0,0 +1,56 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
+
4
+ <modelVersion>4.0.0</modelVersion>
5
+ <groupId>iudex</groupId>
6
+ <artifactId>iudex-rome</artifactId>
7
+ <packaging>jar</packaging>
8
+ <version>1.0.0</version>
9
+ <name>Iudex ROME FeedParser</name>
10
+
11
+ <parent>
12
+ <groupId>iudex</groupId>
13
+ <artifactId>iudex-parent</artifactId>
14
+ <version>1.0</version>
15
+ <relativePath>..</relativePath>
16
+ </parent>
17
+
18
+ <repositories>
19
+ <repository>
20
+ <id>maven2-repository.dev.java.net</id>
21
+ <name>Java.net Repository for Maven</name>
22
+ <url>http://download.java.net/maven/2/</url>
23
+ <layout>default</layout>
24
+ </repository>
25
+ </repositories>
26
+
27
+ <dependencies>
28
+
29
+ <dependency>
30
+ <groupId>iudex</groupId>
31
+ <artifactId>iudex-core</artifactId>
32
+ <version>[1.0,1.1)</version>
33
+ </dependency>
34
+
35
+ <dependency>
36
+ <groupId>rome</groupId>
37
+ <artifactId>rome</artifactId>
38
+ <version>1.0</version>
39
+ </dependency>
40
+
41
+ </dependencies>
42
+
43
+ <build>
44
+ <plugins>
45
+ <plugin>
46
+ <!-- Parent settings -->
47
+ <artifactId>maven-compiler-plugin</artifactId>
48
+ </plugin>
49
+ <plugin>
50
+ <!-- Parent settings -->
51
+ <artifactId>maven-source-plugin</artifactId>
52
+ </plugin>
53
+ </plugins>
54
+ </build>
55
+
56
+ </project>
data/test/setup.rb ADDED
@@ -0,0 +1,34 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ #### General test setup: LOAD_PATH, logging, console output ####
18
+
19
+ ldir = File.join( File.dirname( __FILE__ ), "..", "lib" )
20
+ $LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
21
+
22
+ require 'rubygems'
23
+ require 'rjack-logback'
24
+ RJack::Logback.config_console( :stderr => true )
25
+
26
+ require 'minitest/unit'
27
+ require 'minitest/autorun'
28
+
29
+ # Make test output logging compatible: no partial lines.
30
+ class TestOut
31
+ def print( *a ); $stdout.puts( *a ); end
32
+ def puts( *a ); $stdout.puts( *a ); end
33
+ end
34
+ MiniTest::Unit.output = TestOut.new
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <?xml-stylesheet href="/css/rss20.xsl" type="text/xsl"?>
3
+ <rss version="2.0"
4
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
5
+ xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom">
6
+ <channel>
7
+ <title>Channel Title</title>
8
+ <link>http://iudex.gravitext.com/test/rss.xml</link>
9
+ <description>Channel Description</description>
10
+ <language>en-us</language>
11
+ <ttl>30</ttl>
12
+ <atom:link rel="self" href="http://iudex.gravitext.com/test/rss.xml" type="application/rss+xml"/>
13
+ <item>
14
+ <title>Item Title</title>
15
+ <link>http://iudex.gravitext.com/test/item/1.html?click_track=a79bna7</link>
16
+ <guid isPermaLink="false">>http://iudex.gravitext.com/test/item/1.html</guid>
17
+ <pubDate>Sat, 06 Nov 2010 20:20:00 EDT</pubDate>
18
+ <description><![CDATA[Item Description with <i>HTML</i>]]></description>
19
+ <dc:creator>David Kellum</dc:creator>
20
+ </item>
21
+ </channel>
22
+ </rss>
data/test/test_rome.rb ADDED
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env jruby
2
+ #.hashdot.profile += jruby-shortlived
3
+
4
+ #--
5
+ # Copyright (c) 2008-2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You
9
+ # may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+
20
+ require File.join( File.dirname( __FILE__ ), "setup" )
21
+
22
+ require 'iudex-rome'
23
+
24
+ class TestRome < MiniTest::Unit::TestCase
25
+ include Iudex::Core
26
+ include Iudex::ROME
27
+ include Gravitext::HTMap
28
+
29
+ import 'java.nio.ByteBuffer'
30
+ import 'com.gravitext.util.Charsets'
31
+
32
+ UniMap.define_accessors
33
+
34
+ SIMPLE_RSS = File.join( File.dirname( __FILE__ ), 'simple_rss.xml' )
35
+
36
+ def test_parse
37
+ parser = RomeFeedParser.new
38
+ map = UniMap.new
39
+
40
+ rss_bytes = ByteBuffer.wrap( File.read( SIMPLE_RSS ).to_java_bytes )
41
+ source = ContentSource.new( rss_bytes )
42
+ source.default_encoding = Charsets::UTF_8
43
+ map.source = source
44
+
45
+ parser.filter( map )
46
+
47
+ assert_equal( "Channel Title", map.title )
48
+
49
+ assert( item = map.references.first )
50
+
51
+ assert_equal( "Item Title", item.title )
52
+ assert_equal( "http://iudex.gravitext.com/test/item/1.html" +
53
+ "?click_track=a79bna7",
54
+ item.url.to_s )
55
+ assert( item.pub_date )
56
+ assert_equal( item.pub_date, item.ref_pub_date )
57
+ assert( item.summary )
58
+
59
+ end
60
+
61
+ end
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iudex-rome
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 1.0.0
6
+ platform: java
7
+ authors:
8
+ - David Kellum
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-04-04 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: iudex-core
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: 1.0.0
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: rjack-rome
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ~>
34
+ - !ruby/object:Gem::Version
35
+ version: 1.0.0
36
+ type: :runtime
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: minitest
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.7.1
47
+ - - <
48
+ - !ruby/object:Gem::Version
49
+ version: "2.1"
50
+ type: :development
51
+ version_requirements: *id003
52
+ - !ruby/object:Gem::Dependency
53
+ name: rjack-logback
54
+ prerelease: false
55
+ requirement: &id004 !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ~>
59
+ - !ruby/object:Gem::Version
60
+ version: "1.0"
61
+ type: :development
62
+ version_requirements: *id004
63
+ - !ruby/object:Gem::Dependency
64
+ name: rjack-tarpit
65
+ prerelease: false
66
+ requirement: &id005 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ~>
70
+ - !ruby/object:Gem::Version
71
+ version: 1.3.0
72
+ type: :development
73
+ version_requirements: *id005
74
+ description: |-
75
+ Iudex is a general purpose web crawler and feed processor in
76
+ ruby/java. The iudex-rome gems is an adaption of rjack-rome for feed
77
+ parsing in Iudex.
78
+ email:
79
+ - dek-oss@gravitext.com
80
+ executables: []
81
+
82
+ extensions: []
83
+
84
+ extra_rdoc_files:
85
+ - Manifest.txt
86
+ - History.rdoc
87
+ - README.rdoc
88
+ files:
89
+ - History.rdoc
90
+ - Manifest.txt
91
+ - README.rdoc
92
+ - Rakefile
93
+ - pom.xml
94
+ - lib/iudex-rome/base.rb
95
+ - lib/iudex-rome.rb
96
+ - test/setup.rb
97
+ - test/simple_rss.xml
98
+ - test/test_rome.rb
99
+ - lib/iudex-rome/iudex-rome-1.0.0.jar
100
+ has_rdoc: true
101
+ homepage: http://github.com/dekellum/iudex
102
+ licenses: []
103
+
104
+ post_install_message:
105
+ rdoc_options:
106
+ - --main
107
+ - README.rdoc
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ none: false
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: "0"
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: "0"
122
+ requirements: []
123
+
124
+ rubyforge_project: iudex-rome
125
+ rubygems_version: 1.5.1
126
+ signing_key:
127
+ specification_version: 3
128
+ summary: Iudex is a general purpose web crawler and feed processor in ruby/java
129
+ test_files:
130
+ - test/test_rome.rb