jakal 0.1.95 → 0.1.96
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/jkl/rest_client.rb +12 -6
- data/lib/jkl/text_client.rb +4 -4
- data/test/unit/jkl_test.rb +5 -3
- data/test/unit/text_cleaning_test.rb +1 -1
- metadata +10 -4
data/lib/jkl/rest_client.rb
CHANGED
@@ -10,11 +10,11 @@ module Jkl
|
|
10
10
|
resp, data = Net::HTTP.post_form(uri, post_args)
|
11
11
|
data
|
12
12
|
rescue URI::InvalidURIError => e
|
13
|
-
puts("WARN: Invalid URI: #{e}")
|
13
|
+
puts("WARN: JKL Invalid URI: #{e}")
|
14
14
|
rescue SocketError => e
|
15
|
-
puts("WARN: Could not connect: #{e}")
|
15
|
+
puts("WARN: JKL Could not connect: #{e}")
|
16
16
|
rescue Errno::ECONNREFUSED => e
|
17
|
-
puts("WARN: Connection refused: #{e}")
|
17
|
+
puts("WARN: JKL Connection refused: #{e}")
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
@@ -23,13 +23,19 @@ module Jkl
|
|
23
23
|
response = Net::HTTP.get_response(URI.parse(uri))
|
24
24
|
response.body
|
25
25
|
rescue URI::InvalidURIError => e
|
26
|
-
puts("WARN: Invalid URI: #{e}")
|
26
|
+
puts("WARN: JKL Invalid URI: #{e}")
|
27
27
|
rescue SocketError => e
|
28
|
-
puts("WARN: Could not connect: #{e}")
|
28
|
+
puts("WARN: JKL Could not connect: #{e}")
|
29
29
|
rescue Errno::ECONNREFUSED => e
|
30
|
-
puts("WARN: Connection refused: #{e}")
|
30
|
+
puts("WARN: JKL Connection refused: #{e}")
|
31
31
|
end
|
32
32
|
end
|
33
|
+
|
34
|
+
def get_from_over_https(host, path)
|
35
|
+
http = Net::HTTP.new(host, "443")
|
36
|
+
http.use_ssl = true
|
37
|
+
http.get2(path) # returns [status, data]
|
38
|
+
end
|
33
39
|
|
34
40
|
def get_xml_from(uri)
|
35
41
|
Hpricot.XML(get_from(uri))
|
data/lib/jkl/text_client.rb
CHANGED
@@ -2,8 +2,8 @@ module Jkl
|
|
2
2
|
module Text
|
3
3
|
class << self
|
4
4
|
|
5
|
-
def sanitize(text,
|
6
|
-
remove_short_lines(strip_all_tags(remove_script_tags(text)),
|
5
|
+
def sanitize(text, words_on_line = 5)
|
6
|
+
remove_short_lines(strip_all_tags(remove_script_tags(text)), words_on_line)
|
7
7
|
end
|
8
8
|
alias :clean :sanitize
|
9
9
|
|
@@ -24,12 +24,12 @@ module Jkl
|
|
24
24
|
text.gsub(/((<[\s\/]*script\b[^>]*>)([^>]*)(<\/script>))/i, "")
|
25
25
|
end
|
26
26
|
|
27
|
-
def remove_short_lines(text,
|
27
|
+
def remove_short_lines(text, words_on_line = 5)
|
28
28
|
text = text.gsub(/\s\s/, "\n")
|
29
29
|
str = ""
|
30
30
|
# remove short lines - ususally just navigation
|
31
31
|
text.split("\n").each do |l|
|
32
|
-
str << l unless l.count(" ") <
|
32
|
+
str << l unless l.count(" ") < words_on_line
|
33
33
|
end
|
34
34
|
str
|
35
35
|
end
|
data/test/unit/jkl_test.rb
CHANGED
@@ -2,10 +2,10 @@ require "test/unit"
|
|
2
2
|
require "shoulda"
|
3
3
|
require "webmock/test_unit"
|
4
4
|
require "yaml"
|
5
|
-
|
5
|
+
require_relative "../../lib/jkl"
|
6
6
|
|
7
7
|
class JklTest < Test::Unit::TestCase
|
8
|
-
include WebMock
|
8
|
+
include WebMock::API
|
9
9
|
|
10
10
|
context "Using Jkl" do
|
11
11
|
setup do
|
@@ -33,7 +33,9 @@ class JklTest < Test::Unit::TestCase
|
|
33
33
|
end
|
34
34
|
|
35
35
|
should "extract tags from some text" do
|
36
|
-
|
36
|
+
keys = "config/keys.yml"
|
37
|
+
raise "READ:::::::: You need to create #{keys} and put your calais credentials in it." unless File.exist?(keys)
|
38
|
+
key = YAML::load_file(keys)['calais']
|
37
39
|
text = <<-EOF
|
38
40
|
Barack Obama said today that he expects there
|
39
41
|
to be conflict within his new security team after
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 96
|
9
|
+
version: 0.1.96
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- sshingler
|
@@ -14,13 +14,14 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-04-13 00:00:00 +
|
17
|
+
date: 2010-04-13 00:00:00 +00:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: hpricot
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
24
25
|
requirements:
|
25
26
|
- - ">="
|
26
27
|
- !ruby/object:Gem::Version
|
@@ -35,6 +36,7 @@ dependencies:
|
|
35
36
|
name: json
|
36
37
|
prerelease: false
|
37
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
38
40
|
requirements:
|
39
41
|
- - ">="
|
40
42
|
- !ruby/object:Gem::Version
|
@@ -49,6 +51,7 @@ dependencies:
|
|
49
51
|
name: rest-client
|
50
52
|
prerelease: false
|
51
53
|
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
52
55
|
requirements:
|
53
56
|
- - ">="
|
54
57
|
- !ruby/object:Gem::Version
|
@@ -63,6 +66,7 @@ dependencies:
|
|
63
66
|
name: calais
|
64
67
|
prerelease: false
|
65
68
|
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
66
70
|
requirements:
|
67
71
|
- - ">="
|
68
72
|
- !ruby/object:Gem::Version
|
@@ -106,6 +110,7 @@ rdoc_options:
|
|
106
110
|
require_paths:
|
107
111
|
- lib
|
108
112
|
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
109
114
|
requirements:
|
110
115
|
- - ">="
|
111
116
|
- !ruby/object:Gem::Version
|
@@ -113,6 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
113
118
|
- 0
|
114
119
|
version: "0"
|
115
120
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
116
122
|
requirements:
|
117
123
|
- - ">="
|
118
124
|
- !ruby/object:Gem::Version
|
@@ -122,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
128
|
requirements: []
|
123
129
|
|
124
130
|
rubyforge_project:
|
125
|
-
rubygems_version: 1.3.
|
131
|
+
rubygems_version: 1.3.7
|
126
132
|
signing_key:
|
127
133
|
specification_version: 2
|
128
134
|
summary: Jakal is a Ruby library which contains some utilities for tagging content, cleaning text from web pages and working with RSS feeds.
|