jakal 0.1.95 → 0.1.96
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/jkl/rest_client.rb +12 -6
- data/lib/jkl/text_client.rb +4 -4
- data/test/unit/jkl_test.rb +5 -3
- data/test/unit/text_cleaning_test.rb +1 -1
- metadata +10 -4
data/lib/jkl/rest_client.rb
CHANGED
@@ -10,11 +10,11 @@ module Jkl
|
|
10
10
|
resp, data = Net::HTTP.post_form(uri, post_args)
|
11
11
|
data
|
12
12
|
rescue URI::InvalidURIError => e
|
13
|
-
puts("WARN: Invalid URI: #{e}")
|
13
|
+
puts("WARN: JKL Invalid URI: #{e}")
|
14
14
|
rescue SocketError => e
|
15
|
-
puts("WARN: Could not connect: #{e}")
|
15
|
+
puts("WARN: JKL Could not connect: #{e}")
|
16
16
|
rescue Errno::ECONNREFUSED => e
|
17
|
-
puts("WARN: Connection refused: #{e}")
|
17
|
+
puts("WARN: JKL Connection refused: #{e}")
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
@@ -23,13 +23,19 @@ module Jkl
|
|
23
23
|
response = Net::HTTP.get_response(URI.parse(uri))
|
24
24
|
response.body
|
25
25
|
rescue URI::InvalidURIError => e
|
26
|
-
puts("WARN: Invalid URI: #{e}")
|
26
|
+
puts("WARN: JKL Invalid URI: #{e}")
|
27
27
|
rescue SocketError => e
|
28
|
-
puts("WARN: Could not connect: #{e}")
|
28
|
+
puts("WARN: JKL Could not connect: #{e}")
|
29
29
|
rescue Errno::ECONNREFUSED => e
|
30
|
-
puts("WARN: Connection refused: #{e}")
|
30
|
+
puts("WARN: JKL Connection refused: #{e}")
|
31
31
|
end
|
32
32
|
end
|
33
|
+
|
34
|
+
def get_from_over_https(host, path)
|
35
|
+
http = Net::HTTP.new(host, "443")
|
36
|
+
http.use_ssl = true
|
37
|
+
http.get2(path) # returns [status, data]
|
38
|
+
end
|
33
39
|
|
34
40
|
def get_xml_from(uri)
|
35
41
|
Hpricot.XML(get_from(uri))
|
data/lib/jkl/text_client.rb
CHANGED
@@ -2,8 +2,8 @@ module Jkl
|
|
2
2
|
module Text
|
3
3
|
class << self
|
4
4
|
|
5
|
-
def sanitize(text,
|
6
|
-
remove_short_lines(strip_all_tags(remove_script_tags(text)),
|
5
|
+
def sanitize(text, words_on_line = 5)
|
6
|
+
remove_short_lines(strip_all_tags(remove_script_tags(text)), words_on_line)
|
7
7
|
end
|
8
8
|
alias :clean :sanitize
|
9
9
|
|
@@ -24,12 +24,12 @@ module Jkl
|
|
24
24
|
text.gsub(/((<[\s\/]*script\b[^>]*>)([^>]*)(<\/script>))/i, "")
|
25
25
|
end
|
26
26
|
|
27
|
-
def remove_short_lines(text,
|
27
|
+
def remove_short_lines(text, words_on_line = 5)
|
28
28
|
text = text.gsub(/\s\s/, "\n")
|
29
29
|
str = ""
|
30
30
|
# remove short lines - ususally just navigation
|
31
31
|
text.split("\n").each do |l|
|
32
|
-
str << l unless l.count(" ") <
|
32
|
+
str << l unless l.count(" ") < words_on_line
|
33
33
|
end
|
34
34
|
str
|
35
35
|
end
|
data/test/unit/jkl_test.rb
CHANGED
@@ -2,10 +2,10 @@ require "test/unit"
|
|
2
2
|
require "shoulda"
|
3
3
|
require "webmock/test_unit"
|
4
4
|
require "yaml"
|
5
|
-
|
5
|
+
require_relative "../../lib/jkl"
|
6
6
|
|
7
7
|
class JklTest < Test::Unit::TestCase
|
8
|
-
include WebMock
|
8
|
+
include WebMock::API
|
9
9
|
|
10
10
|
context "Using Jkl" do
|
11
11
|
setup do
|
@@ -33,7 +33,9 @@ class JklTest < Test::Unit::TestCase
|
|
33
33
|
end
|
34
34
|
|
35
35
|
should "extract tags from some text" do
|
36
|
-
|
36
|
+
keys = "config/keys.yml"
|
37
|
+
raise "READ:::::::: You need to create #{keys} and put your calais credentials in it." unless File.exist?(keys)
|
38
|
+
key = YAML::load_file(keys)['calais']
|
37
39
|
text = <<-EOF
|
38
40
|
Barack Obama said today that he expects there
|
39
41
|
to be conflict within his new security team after
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 96
|
9
|
+
version: 0.1.96
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- sshingler
|
@@ -14,13 +14,14 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-04-13 00:00:00 +
|
17
|
+
date: 2010-04-13 00:00:00 +00:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: hpricot
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
24
25
|
requirements:
|
25
26
|
- - ">="
|
26
27
|
- !ruby/object:Gem::Version
|
@@ -35,6 +36,7 @@ dependencies:
|
|
35
36
|
name: json
|
36
37
|
prerelease: false
|
37
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
38
40
|
requirements:
|
39
41
|
- - ">="
|
40
42
|
- !ruby/object:Gem::Version
|
@@ -49,6 +51,7 @@ dependencies:
|
|
49
51
|
name: rest-client
|
50
52
|
prerelease: false
|
51
53
|
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
52
55
|
requirements:
|
53
56
|
- - ">="
|
54
57
|
- !ruby/object:Gem::Version
|
@@ -63,6 +66,7 @@ dependencies:
|
|
63
66
|
name: calais
|
64
67
|
prerelease: false
|
65
68
|
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
66
70
|
requirements:
|
67
71
|
- - ">="
|
68
72
|
- !ruby/object:Gem::Version
|
@@ -106,6 +110,7 @@ rdoc_options:
|
|
106
110
|
require_paths:
|
107
111
|
- lib
|
108
112
|
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
109
114
|
requirements:
|
110
115
|
- - ">="
|
111
116
|
- !ruby/object:Gem::Version
|
@@ -113,6 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
113
118
|
- 0
|
114
119
|
version: "0"
|
115
120
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
116
122
|
requirements:
|
117
123
|
- - ">="
|
118
124
|
- !ruby/object:Gem::Version
|
@@ -122,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
128
|
requirements: []
|
123
129
|
|
124
130
|
rubyforge_project:
|
125
|
-
rubygems_version: 1.3.
|
131
|
+
rubygems_version: 1.3.7
|
126
132
|
signing_key:
|
127
133
|
specification_version: 2
|
128
134
|
summary: Jakal is a Ruby library which contains some utilities for tagging content, cleaning text from web pages and working with RSS feeds.
|