nayutaya-bookmark-utility 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,9 @@
|
|
1
|
+
|
2
|
+
module BookmarkUtility
|
3
|
+
CleanseTitleTable = {
|
4
|
+
"mainichi.jp" => [%r'\A(.+) - 毎日jp\(毎日新聞\)\Z', '\1'].freeze,
|
5
|
+
"sankei.jp.msn.com" => [%r'\A(.+) - MSN産経ニュース\Z', '\1'].freeze,
|
6
|
+
"www.asahi.com" => [%r'\Aasahi\.com(朝日新聞社):(.+)\Z', '\1'].freeze,
|
7
|
+
"www.yomiuri.co.jp" => [%r'\A(.+) : YOMIURI ONLINE(読売新聞)\Z', '\1'].freeze,
|
8
|
+
}.freeze
|
9
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
|
2
|
+
require "uri"
|
2
3
|
require File.join(File.dirname(__FILE__), "canonical_table")
|
3
4
|
require File.join(File.dirname(__FILE__), "filter_table")
|
5
|
+
require File.join(File.dirname(__FILE__), "cleanse_title_table")
|
4
6
|
|
5
7
|
module BookmarkUtility
|
6
8
|
def self.get_canonical_url(url)
|
@@ -14,4 +16,14 @@ module BookmarkUtility
|
|
14
16
|
def self.reject?(url)
|
15
17
|
return FilterTable.any? { |pattern| pattern =~ url }
|
16
18
|
end
|
19
|
+
|
20
|
+
def self.cleanse_title(url, title)
|
21
|
+
uri = URI.parse(url)
|
22
|
+
pattern, replace = CleanseTitleTable[uri.host]
|
23
|
+
if pattern
|
24
|
+
return title.gsub(pattern, replace)
|
25
|
+
else
|
26
|
+
return title
|
27
|
+
end
|
28
|
+
end
|
17
29
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
|
2
2
|
module BookmarkUtility
|
3
3
|
FilterTable = [
|
4
|
+
%r'\Ahttp://cambodiawatch\.net/', # 理由: title要素に記事名が含まれていないため
|
4
5
|
%r'\Ahttp://car\.nikkei\.co\.jp/news/', # 理由: title要素に記事名が含まれていないため
|
5
6
|
%r'\Ahttp://car\.nikkei\.co\.jp/release/', # 理由: title要素に記事名が含まれていないため
|
6
7
|
%r'\Ahttp://cargo-news\.co\.jp/', # 理由: title要素に記事名が含まれていないため
|
@@ -27,6 +28,7 @@ module BookmarkUtility
|
|
27
28
|
%r'\Ahttp://www\.nikkei\.co\.jp/news/', # 理由: title要素に記事名が含まれていないため
|
28
29
|
%r'\Ahttp://www\.pjnews\.net/', # 理由: title要素に記事名が含まれていないため
|
29
30
|
%r'\Ahttp://www\.portfolio\.nl/', # 理由: title要素に記事名が含まれていないため
|
31
|
+
%r'\Ahttp://www\.sanpo-pub\.co\.jp/column/', # 理由: title要素に記事名が含まれていないため
|
30
32
|
%r'\Ahttp://www\.shonai-nippo\.co\.jp/', # 理由: title要素に記事名が含まれていないため
|
31
33
|
%r'\Ahttp://www\.tohkaishimpo\.com/', # 理由: title要素に記事名が含まれていないため
|
32
34
|
%r'\Ahttp://www\.toyama\.hokkoku\.co\.jp/subpage/', # 理由: title要素に記事名が含まれていないため
|
@@ -5,8 +5,8 @@ Gem::Specification.new do |s|
|
|
5
5
|
s.required_ruby_version = Gem::Requirement.new(">= 1.8.6")
|
6
6
|
|
7
7
|
s.name = "nayutaya-bookmark-utility"
|
8
|
-
s.version = "0.
|
9
|
-
s.date = "
|
8
|
+
s.version = "0.2.0"
|
9
|
+
s.date = "2010-01-07"
|
10
10
|
|
11
11
|
s.authors = ["Yuya Kato"]
|
12
12
|
s.email = "yuyakato@gmail.com"
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
|
22
22
|
s.files = [
|
23
23
|
"lib/bookmark_utility/canonical_table.rb",
|
24
|
+
"lib/bookmark_utility/cleanse_title_table.rb",
|
24
25
|
"lib/bookmark_utility/core.rb",
|
25
26
|
"lib/bookmark_utility/filter_table.rb",
|
26
27
|
"lib/bookmark_utility/version.rb",
|
@@ -30,11 +31,13 @@ Gem::Specification.new do |s|
|
|
30
31
|
"Rakefile",
|
31
32
|
"test/alltests.rb",
|
32
33
|
"test/canonical_test.rb",
|
34
|
+
"test/cleanse_title_test.rb",
|
33
35
|
"test/filter_test.rb",
|
34
36
|
]
|
35
37
|
s.test_files = [
|
36
38
|
"test/alltests.rb",
|
37
39
|
"test/canonical_test.rb",
|
40
|
+
"test/cleanse_title_test.rb",
|
38
41
|
"test/filter_test.rb",
|
39
42
|
]
|
40
43
|
s.extra_rdoc_files = []
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#! ruby -Ku
|
2
|
+
|
3
|
+
$:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
|
4
|
+
|
5
|
+
require "test/unit"
|
6
|
+
require "bookmark_utility"
|
7
|
+
|
8
|
+
class CleanseTitleTest < Test::Unit::TestCase
|
9
|
+
def setup
|
10
|
+
@module = BookmarkUtility
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_cleanse_title
|
14
|
+
cases = File.open(File.join(File.dirname(__FILE__), "..", "..", "cleanse_title_cases.txt"), "rb") { |file|
|
15
|
+
file.map { |line|
|
16
|
+
line.chomp.split(/\t/)
|
17
|
+
}
|
18
|
+
}
|
19
|
+
cases.each { |url, title, expected|
|
20
|
+
assert_equal(expected, @module.cleanse_title(url, title), title)
|
21
|
+
}
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nayutaya-bookmark-utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuya Kato
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-01-07 00:00:00 +09:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -23,6 +23,7 @@ extra_rdoc_files: []
|
|
23
23
|
|
24
24
|
files:
|
25
25
|
- lib/bookmark_utility/canonical_table.rb
|
26
|
+
- lib/bookmark_utility/cleanse_title_table.rb
|
26
27
|
- lib/bookmark_utility/core.rb
|
27
28
|
- lib/bookmark_utility/filter_table.rb
|
28
29
|
- lib/bookmark_utility/version.rb
|
@@ -32,6 +33,7 @@ files:
|
|
32
33
|
- Rakefile
|
33
34
|
- test/alltests.rb
|
34
35
|
- test/canonical_test.rb
|
36
|
+
- test/cleanse_title_test.rb
|
35
37
|
- test/filter_test.rb
|
36
38
|
has_rdoc: true
|
37
39
|
homepage: http://github.com/nayutaya/bookmark-utility/
|
@@ -64,4 +66,5 @@ summary: bookmark-utility
|
|
64
66
|
test_files:
|
65
67
|
- test/alltests.rb
|
66
68
|
- test/canonical_test.rb
|
69
|
+
- test/cleanse_title_test.rb
|
67
70
|
- test/filter_test.rb
|