jakal 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/License.txt +22 -0
- data/README.rdoc +28 -0
- data/features/calais.feature +38 -0
- data/features/http.feature +32 -0
- data/features/mocks/bbc_story.html +2863 -0
- data/features/mocks/calais.json +2464 -0
- data/features/mocks/topix_rss.xml +47 -0
- data/features/mocks/twitter.json +11 -0
- data/features/processing.feature +16 -0
- data/features/sanitize-text.feature +53 -0
- data/features/step_definitions/calais_steps.rb +44 -0
- data/features/step_definitions/http_steps.rb +56 -0
- data/features/step_definitions/processing_steps.rb +30 -0
- data/features/step_definitions/require_steps.rb +12 -0
- data/features/step_definitions/sanitize-text_steps.rb +32 -0
- data/features/step_definitions/twitter_steps.rb +17 -0
- data/features/support/env.rb +10 -0
- data/lib/jkl.rb +48 -0
- data/lib/jkl/calais_client.rb +64 -0
- data/lib/jkl/rest_client.rb +36 -0
- data/lib/jkl/rss_client.rb +15 -0
- data/lib/jkl/url_doc_handler.rb +31 -0
- metadata +78 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'hpricot'
|
3
|
+
|
4
|
+
module Jkl
|
5
|
+
|
6
|
+
def self.post_to(uri, post_args)
|
7
|
+
begin
|
8
|
+
resp, data = Net::HTTP.post_form(uri, post_args)
|
9
|
+
data
|
10
|
+
rescue URI::InvalidURIError => e
|
11
|
+
puts("WARN: Invalid URI: #{e}")
|
12
|
+
rescue SocketError => e
|
13
|
+
puts("WARN: Could not connect: #{e}")
|
14
|
+
rescue Errno::ECONNREFUSED => e
|
15
|
+
puts("WARN: Connection refused: #{e}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.get_from(uri)
|
20
|
+
begin
|
21
|
+
res = Net::HTTP.get_response(URI.parse(uri))
|
22
|
+
res.body
|
23
|
+
rescue URI::InvalidURIError => e
|
24
|
+
puts("WARN: Invalid URI: #{e}")
|
25
|
+
rescue SocketError => e
|
26
|
+
puts("WARN: Could not connect: #{e}")
|
27
|
+
rescue Errno::ECONNREFUSED => e
|
28
|
+
puts("WARN: Connection refused: #{e}")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.get_from_as_xml(uri)
|
33
|
+
Hpricot.XML get_from uri
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
|
3
|
+
module Jkl
|
4
|
+
|
5
|
+
def self.get_items_from(rssdoc)
|
6
|
+
items = []
|
7
|
+
(rssdoc/:item).each { |rssitem| items.push rssitem } unless rssdoc==nil
|
8
|
+
items
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.attribute_from(item, name)
|
12
|
+
(item/name).inner_html
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
require 'rest_client'
|
3
|
+
|
4
|
+
module Jkl
|
5
|
+
|
6
|
+
def self.sanitize(text)
|
7
|
+
str = ""
|
8
|
+
text = text.to_s.gsub(/((<[\s\/]*script\b[^>]*>)([^>]*)(<\/script>))/i,"") #remove script tags - with contents
|
9
|
+
text.to_s.gsub(/<\/?[^>]*>/, "").split("\r").each do |l| # remove all tags
|
10
|
+
l = l.gsub(/^[ \t]/,"") #remove tabs
|
11
|
+
l = l.gsub(/^[ \s]/,"")
|
12
|
+
l.split("\n").each do |l|
|
13
|
+
str << l unless l.count(" ") < 5 # remove short lines - ususally just navigation
|
14
|
+
end
|
15
|
+
end
|
16
|
+
str
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.from_doc(response)
|
20
|
+
begin
|
21
|
+
Hpricot(response)
|
22
|
+
rescue URI::InvalidURIError => e
|
23
|
+
puts("WARN: Problem with getting a connection: #{e}")
|
24
|
+
rescue SocketError => e
|
25
|
+
puts("WARN: Could not connect to feed: #{e}")
|
26
|
+
rescue Errno::ECONNREFUSED => e
|
27
|
+
puts("WARN: Connection refused: #{e}")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jakal
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.7
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- sshingler
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-08-27 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Jakal is a Ruby library for dealing with information overload.
|
17
|
+
email: "'shingler@gmail.com'"
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.rdoc
|
24
|
+
- License.txt
|
25
|
+
files:
|
26
|
+
- lib/jkl.rb
|
27
|
+
- lib/jkl/calais_client.rb
|
28
|
+
- lib/jkl/rest_client.rb
|
29
|
+
- lib/jkl/rss_client.rb
|
30
|
+
- lib/jkl/url_doc_handler.rb
|
31
|
+
- features/calais.feature
|
32
|
+
- features/http.feature
|
33
|
+
- features/processing.feature
|
34
|
+
- features/sanitize-text.feature
|
35
|
+
- features/mocks/bbc_story.html
|
36
|
+
- features/mocks/calais.json
|
37
|
+
- features/mocks/topix_rss.xml
|
38
|
+
- features/mocks/twitter.json
|
39
|
+
- features/step_definitions/calais_steps.rb
|
40
|
+
- features/step_definitions/http_steps.rb
|
41
|
+
- features/step_definitions/processing_steps.rb
|
42
|
+
- features/step_definitions/require_steps.rb
|
43
|
+
- features/step_definitions/sanitize-text_steps.rb
|
44
|
+
- features/step_definitions/twitter_steps.rb
|
45
|
+
- features/support/env.rb
|
46
|
+
- README.rdoc
|
47
|
+
- License.txt
|
48
|
+
has_rdoc: true
|
49
|
+
homepage: http://github.com/sshingler/jkl
|
50
|
+
licenses: []
|
51
|
+
|
52
|
+
post_install_message:
|
53
|
+
rdoc_options:
|
54
|
+
- --inline-source
|
55
|
+
- --charset=UTF-8
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: "0"
|
63
|
+
version:
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
version:
|
70
|
+
requirements: []
|
71
|
+
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.3.5
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: Jakal is a Ruby library for dealing with information overload.
|
77
|
+
test_files: []
|
78
|
+
|