diff_news 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/default_history.rb +42 -0
- data/lib/diff_news.rb +12 -0
- data/lib/file_history.rb +36 -0
- data/lib/news_service/mock.rb +35 -0
- data/lib/news_service/nzz.rb +46 -0
- data/lib/news_service/tagesanzeiger.rb +56 -0
- data/lib/revision.rb +7 -0
- data/lib/service.rb +86 -0
- data/lib/version.info +1 -0
- data/lib/version.rb +17 -0
- data/test/file_store_test.rb +18 -0
- data/test/news_service_test.rb +26 -0
- data/test/service_test.rb +16 -0
- data/test/test_helper.rb +5 -0
- metadata +141 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b8fb36e0227389624d87173d0c0c6a1345a7f933
|
4
|
+
data.tar.gz: 4fa5f28152f01432fdde5b51df1774439080024c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: adca1f78c3bcf8fb43eab5b88fbe87117d98f4e56e30f577ab3d438dba9d78e8e5bcc8bce8092e948bcf3dca0515d379492bf6eac59b9ff7f1fffe62f54c7c2e
|
7
|
+
data.tar.gz: 357f4e1087332e2ff2618ea76fa427c2e732e8cffe6e22f9743dbb94683b9456f76fb96c7abe19bcd91dee361fa3fc4529b1b8ec5d3018df212ac5e63156357a
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#
|
2
|
+
# Store and retrieve a history by provider and uuid
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'revision'
|
6
|
+
|
7
|
+
module DiffNews
|
8
|
+
|
9
|
+
class DefaultHistory
|
10
|
+
attr_reader :revisions
|
11
|
+
|
12
|
+
def initialize provider, uuid
|
13
|
+
@provider = provider
|
14
|
+
@uuid = uuid
|
15
|
+
|
16
|
+
@revisions = []
|
17
|
+
end
|
18
|
+
|
19
|
+
def load
|
20
|
+
end
|
21
|
+
|
22
|
+
def store
|
23
|
+
end
|
24
|
+
|
25
|
+
def revision_changed? rev1, rev2
|
26
|
+
return true if rev1 == nil
|
27
|
+
return rev1.title != rev2.title || rev1.teaser != rev2.teaser || rev1.text != rev2.text
|
28
|
+
end
|
29
|
+
|
30
|
+
def append_changed_revision title, teaser, text
|
31
|
+
new_revision = Revision.new @provider, @uuid, @revisions.count, title, teaser, text
|
32
|
+
if revision_changed? @revisions.last, new_revision
|
33
|
+
@revisions << new_revision
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def clear
|
38
|
+
@revisions.clear
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
data/lib/diff_news.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
#
|
2
|
+
# Diff News
|
3
|
+
#
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
6
|
+
|
7
|
+
require 'service'
|
8
|
+
require 'default_history'
|
9
|
+
require 'file_history'
|
10
|
+
require 'news_service/tagesanzeiger'
|
11
|
+
require 'news_service/nzz'
|
12
|
+
require 'news_service/mock'
|
data/lib/file_history.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#
|
2
|
+
# Store and retrieve a history by provider and uuid
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'revision'
|
6
|
+
require 'yaml'
|
7
|
+
|
8
|
+
module DiffNews
|
9
|
+
|
10
|
+
class FileHistory < DefaultHistory
|
11
|
+
attr_reader :revisions
|
12
|
+
|
13
|
+
def initialize provider, uuid
|
14
|
+
super provider, uuid
|
15
|
+
end
|
16
|
+
|
17
|
+
def file_path(extension)
|
18
|
+
"#{@provider}/#{@uuid}.#{extension}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def load
|
22
|
+
if File.exist? file_path('yml')
|
23
|
+
@revisions = YAML.load(File.open(file_path('yml'), 'r:UTF-8', &:read))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def store
|
28
|
+
if @revisions
|
29
|
+
File.open(file_path('yml'), 'w:UTF-8') do |file|
|
30
|
+
file.write(YAML::dump(@revisions))
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#
|
2
|
+
# Mock Service Implementation
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'service'
|
6
|
+
|
7
|
+
module DiffNews
|
8
|
+
|
9
|
+
class MockService < Service
|
10
|
+
attr_reader :title, :teaser, :text
|
11
|
+
|
12
|
+
def initialize url
|
13
|
+
@title = "Mock Title"
|
14
|
+
@teaser = "Mock Teaser"
|
15
|
+
@text = "Mock Text"
|
16
|
+
|
17
|
+
super url
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.provider
|
21
|
+
'mock_service'
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.valid_url
|
25
|
+
/mock_me/
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_document
|
29
|
+
end
|
30
|
+
|
31
|
+
register_service
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'service'
|
2
|
+
|
3
|
+
module DiffNews
|
4
|
+
|
5
|
+
class NZZ < Service
|
6
|
+
def initialize url
|
7
|
+
super url
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.provider
|
11
|
+
'nzz.ch'
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.valid_url
|
15
|
+
/^https?:\/\/(www.)?nzz.ch/
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def title
|
20
|
+
normalize_text(strip_html(@doc.css(".title__name").first))
|
21
|
+
rescue
|
22
|
+
''
|
23
|
+
end
|
24
|
+
|
25
|
+
def teaser
|
26
|
+
normalize_text(strip_html(@doc.css(".leadtext").first))
|
27
|
+
rescue
|
28
|
+
''
|
29
|
+
end
|
30
|
+
|
31
|
+
def text
|
32
|
+
content = @doc.css("article.content").first
|
33
|
+
|
34
|
+
content.css('script').remove
|
35
|
+
content.css('.inlinePoll').remove
|
36
|
+
|
37
|
+
normalize_text(strip_html(content))
|
38
|
+
rescue
|
39
|
+
''
|
40
|
+
end
|
41
|
+
|
42
|
+
register_service
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'service'
|
2
|
+
require 'revision'
|
3
|
+
|
4
|
+
module DiffNews
|
5
|
+
|
6
|
+
class Tagesanzeiger < Service
|
7
|
+
def initialize url
|
8
|
+
super normalize_url(url)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.provider
|
12
|
+
'tagesanzeiger.ch'
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.valid_url
|
16
|
+
/^https?:\/\/(www.)?tagesanzeiger.ch/
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
# Remove the SEO parts, making the URL more unique
|
22
|
+
def normalize_url url
|
23
|
+
parts = url.split('/')
|
24
|
+
if parts.count > 5
|
25
|
+
(parts.first(4) + parts.last(2)).join('/')
|
26
|
+
else
|
27
|
+
url
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def title
|
32
|
+
normalize_text(strip_html(@doc.css("h1").first))
|
33
|
+
rescue
|
34
|
+
''
|
35
|
+
end
|
36
|
+
|
37
|
+
def teaser
|
38
|
+
normalize_text(strip_html(@doc.css("h3").first))
|
39
|
+
rescue
|
40
|
+
''
|
41
|
+
end
|
42
|
+
|
43
|
+
def text
|
44
|
+
content = @doc.css("#mainContent").first
|
45
|
+
|
46
|
+
content.css('script').remove
|
47
|
+
content.css('.inlinePoll').remove
|
48
|
+
|
49
|
+
normalize_text(strip_html(content))
|
50
|
+
rescue
|
51
|
+
''
|
52
|
+
end
|
53
|
+
|
54
|
+
register_service
|
55
|
+
end
|
56
|
+
end
|
data/lib/revision.rb
ADDED
data/lib/service.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
#
|
2
|
+
# News Service Factory
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'uuidtools'
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'open-uri'
|
8
|
+
|
9
|
+
require 'default_history'
|
10
|
+
|
11
|
+
module DiffNews
|
12
|
+
|
13
|
+
class UnknownNewspageError < StandardError
|
14
|
+
end
|
15
|
+
|
16
|
+
class Service
|
17
|
+
attr_reader :history
|
18
|
+
@@services = []
|
19
|
+
@@store_class = DefaultHistory
|
20
|
+
|
21
|
+
def provider
|
22
|
+
self.class.provider
|
23
|
+
end
|
24
|
+
|
25
|
+
def current_revision
|
26
|
+
return @history.revisions.last
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.store_class= store_class
|
30
|
+
@@store_class = store_class
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize url
|
34
|
+
@url = url
|
35
|
+
@uuid = UUIDTools::UUID.sha1_create(UUIDTools::UUID_URL_NAMESPACE, url)
|
36
|
+
load_document
|
37
|
+
load_history
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.register_service
|
41
|
+
puts "Registered Service Provider: #{self.provider}"
|
42
|
+
@@services << self
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.create url
|
46
|
+
match = @@services.select { |s| url =~ s.valid_url }
|
47
|
+
raise UnknownNewspageError if match.none?
|
48
|
+
|
49
|
+
match.first.new url
|
50
|
+
end
|
51
|
+
|
52
|
+
def strip_html partial
|
53
|
+
partial.css("br").each { |node| node.replace("\n") }
|
54
|
+
partial.css("p").each { |node| node.replace("#{node.text.strip}\n\n") }
|
55
|
+
partial.text.strip
|
56
|
+
end
|
57
|
+
|
58
|
+
def normalize_text text
|
59
|
+
empty_lines = 0
|
60
|
+
result = ''
|
61
|
+
text.each_line do |line|
|
62
|
+
line = line.strip
|
63
|
+
line = line.gsub(/\s+/, ' ')
|
64
|
+
empty_lines = line.length == 0 ? empty_lines + 1 : 0
|
65
|
+
if empty_lines < 2
|
66
|
+
result += line.strip + "\n"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
return result
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
def load_document
|
74
|
+
@doc = Nokogiri::HTML(open(@url))
|
75
|
+
end
|
76
|
+
|
77
|
+
def load_history
|
78
|
+
@history = @@store_class.new provider, @uuid
|
79
|
+
|
80
|
+
@history.load
|
81
|
+
@history.append_changed_revision title, teaser, text
|
82
|
+
@history.store
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
data/lib/version.info
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.9.0
|
data/lib/version.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Version of vcs.rb
|
3
|
+
#
|
4
|
+
|
5
|
+
module DiffNews
|
6
|
+
def self.version_path
|
7
|
+
File.expand_path("version.info", File.dirname(__FILE__))
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
File.open(version_path, &:readline)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
$vcs_ruby_version = Gem::Version.new(DiffNews::version)
|
16
|
+
$vcs_ruby_name = 'DiffNews makes changes visible'
|
17
|
+
$vcs_ruby_short = 'diff_news'
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'minitest'
|
2
|
+
require 'diff_news'
|
3
|
+
|
4
|
+
class FileStoreTest < Minitest::Test
|
5
|
+
|
6
|
+
def mock_service_file_history
|
7
|
+
DiffNews::Service::store_class = DiffNews::FileHistory
|
8
|
+
mock = DiffNews::Service.create 'mock_me'
|
9
|
+
end
|
10
|
+
|
11
|
+
def tagi_history
|
12
|
+
DiffNews::Service::store_class = DiffNews::FileHistory
|
13
|
+
tagi = DiffNews::Service.create 'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709'
|
14
|
+
|
15
|
+
assert_equal 1, tagi.history.revisions.count
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'minitest'
|
2
|
+
require 'diff_news'
|
3
|
+
|
4
|
+
class NewsServiceTest < Minitest::Test
|
5
|
+
|
6
|
+
def test_mock_service_history
|
7
|
+
DiffNews::Service::store_class = DiffNews::DefaultHistory
|
8
|
+
service = DiffNews::Service.create 'mock_me'
|
9
|
+
|
10
|
+
assert_equal 1, service.history.revisions.count
|
11
|
+
assert_equal "Mock Teaser", service.history.revisions.first.teaser
|
12
|
+
assert_equal "Mock Title", service.current_revision.title
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_tagi_history
|
16
|
+
DiffNews::Service::store_class = DiffNews::DefaultHistory
|
17
|
+
tagi = DiffNews::Service.create 'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709'
|
18
|
+
assert_equal 1, tagi.history.revisions.count
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_nzz_history
|
22
|
+
DiffNews::Service::store_class = DiffNews::DefaultHistory
|
23
|
+
nzz = DiffNews::Service.create 'https://www.nzz.ch/finanzen/abgang-eines-wichtigen-softwarespezialisten-die-tesla-aktie-hat-derzeit-gegenwind-ld.1304770'
|
24
|
+
assert_equal 1, nzz.history.revisions.count
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'minitest'
|
2
|
+
require 'diff_news'
|
3
|
+
|
4
|
+
class ServiceTest < Minitest::Test
|
5
|
+
def test_create_providers
|
6
|
+
providers = {
|
7
|
+
'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709' => 'tagesanzeiger.ch',
|
8
|
+
'https://www.nzz.ch/international/europa/abstimmung-im-eu-parlament-xxx-fuer-beitrittsverhandlungen-mit-der-tuerkei-ld.1304606' => 'nzz.ch'
|
9
|
+
}
|
10
|
+
|
11
|
+
providers.each do |url, provider|
|
12
|
+
service = DiffNews::Service.create url
|
13
|
+
assert_equal service.provider, provider
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: diff_news
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Thomas Bruderer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-07-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.5.0
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.5'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.5.0
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: rake
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '11.0'
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 11.0.0
|
43
|
+
type: :development
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '11.0'
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 11.0.0
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: minitest
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '5.10'
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 5.0.0
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '5.10'
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 5.0.0
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: minitest-reporters
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '1.1'
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.0.0
|
83
|
+
type: :development
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.1'
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 1.0.0
|
93
|
+
description: ''
|
94
|
+
email: apophis@apophis.ch
|
95
|
+
executables: []
|
96
|
+
extensions: []
|
97
|
+
extra_rdoc_files: []
|
98
|
+
files:
|
99
|
+
- lib/default_history.rb
|
100
|
+
- lib/diff_news.rb
|
101
|
+
- lib/file_history.rb
|
102
|
+
- lib/news_service/mock.rb
|
103
|
+
- lib/news_service/nzz.rb
|
104
|
+
- lib/news_service/tagesanzeiger.rb
|
105
|
+
- lib/revision.rb
|
106
|
+
- lib/service.rb
|
107
|
+
- lib/version.info
|
108
|
+
- lib/version.rb
|
109
|
+
- test/file_store_test.rb
|
110
|
+
- test/news_service_test.rb
|
111
|
+
- test/service_test.rb
|
112
|
+
- test/test_helper.rb
|
113
|
+
homepage: https://github.com/FreeApophis/DiffNews
|
114
|
+
licenses:
|
115
|
+
- MIT
|
116
|
+
metadata: {}
|
117
|
+
post_install_message:
|
118
|
+
rdoc_options: []
|
119
|
+
require_paths:
|
120
|
+
- lib
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '2.2'
|
126
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
requirements: []
|
132
|
+
rubyforge_project:
|
133
|
+
rubygems_version: 2.6.10
|
134
|
+
signing_key:
|
135
|
+
specification_version: 4
|
136
|
+
summary: Tool to keep track of content changes on news sites
|
137
|
+
test_files:
|
138
|
+
- test/news_service_test.rb
|
139
|
+
- test/file_store_test.rb
|
140
|
+
- test/test_helper.rb
|
141
|
+
- test/service_test.rb
|