diff_news 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/default_history.rb +42 -0
- data/lib/diff_news.rb +12 -0
- data/lib/file_history.rb +36 -0
- data/lib/news_service/mock.rb +35 -0
- data/lib/news_service/nzz.rb +46 -0
- data/lib/news_service/tagesanzeiger.rb +56 -0
- data/lib/revision.rb +7 -0
- data/lib/service.rb +86 -0
- data/lib/version.info +1 -0
- data/lib/version.rb +17 -0
- data/test/file_store_test.rb +18 -0
- data/test/news_service_test.rb +26 -0
- data/test/service_test.rb +16 -0
- data/test/test_helper.rb +5 -0
- metadata +141 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b8fb36e0227389624d87173d0c0c6a1345a7f933
|
4
|
+
data.tar.gz: 4fa5f28152f01432fdde5b51df1774439080024c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: adca1f78c3bcf8fb43eab5b88fbe87117d98f4e56e30f577ab3d438dba9d78e8e5bcc8bce8092e948bcf3dca0515d379492bf6eac59b9ff7f1fffe62f54c7c2e
|
7
|
+
data.tar.gz: 357f4e1087332e2ff2618ea76fa427c2e732e8cffe6e22f9743dbb94683b9456f76fb96c7abe19bcd91dee361fa3fc4529b1b8ec5d3018df212ac5e63156357a
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#
|
2
|
+
# Store and retrieve a history by provider and uuid
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'revision'
|
6
|
+
|
7
|
+
module DiffNews
|
8
|
+
|
9
|
+
class DefaultHistory
|
10
|
+
attr_reader :revisions
|
11
|
+
|
12
|
+
def initialize provider, uuid
|
13
|
+
@provider = provider
|
14
|
+
@uuid = uuid
|
15
|
+
|
16
|
+
@revisions = []
|
17
|
+
end
|
18
|
+
|
19
|
+
def load
|
20
|
+
end
|
21
|
+
|
22
|
+
def store
|
23
|
+
end
|
24
|
+
|
25
|
+
def revision_changed? rev1, rev2
|
26
|
+
return true if rev1 == nil
|
27
|
+
return rev1.title != rev2.title || rev1.teaser != rev2.teaser || rev1.text != rev2.text
|
28
|
+
end
|
29
|
+
|
30
|
+
def append_changed_revision title, teaser, text
|
31
|
+
new_revision = Revision.new @provider, @uuid, @revisions.count, title, teaser, text
|
32
|
+
if revision_changed? @revisions.last, new_revision
|
33
|
+
@revisions << new_revision
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def clear
|
38
|
+
@revisions.clear
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
data/lib/diff_news.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
#
|
2
|
+
# Diff News
|
3
|
+
#
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
6
|
+
|
7
|
+
require 'service'
|
8
|
+
require 'default_history'
|
9
|
+
require 'file_history'
|
10
|
+
require 'news_service/tagesanzeiger'
|
11
|
+
require 'news_service/nzz'
|
12
|
+
require 'news_service/mock'
|
data/lib/file_history.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#
|
2
|
+
# Store and retrieve a history by provider and uuid
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'revision'
|
6
|
+
require 'yaml'
|
7
|
+
|
8
|
+
module DiffNews
|
9
|
+
|
10
|
+
class FileHistory < DefaultHistory
|
11
|
+
attr_reader :revisions
|
12
|
+
|
13
|
+
def initialize provider, uuid
|
14
|
+
super provider, uuid
|
15
|
+
end
|
16
|
+
|
17
|
+
def file_path(extension)
|
18
|
+
"#{@provider}/#{@uuid}.#{extension}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def load
|
22
|
+
if File.exist? file_path('yml')
|
23
|
+
@revisions = YAML.load(File.open(file_path('yml'), 'r:UTF-8', &:read))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def store
|
28
|
+
if @revisions
|
29
|
+
File.open(file_path('yml'), 'w:UTF-8') do |file|
|
30
|
+
file.write(YAML::dump(@revisions))
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#
|
2
|
+
# Mock Service Implementation
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'service'
|
6
|
+
|
7
|
+
module DiffNews
|
8
|
+
|
9
|
+
class MockService < Service
|
10
|
+
attr_reader :title, :teaser, :text
|
11
|
+
|
12
|
+
def initialize url
|
13
|
+
@title = "Mock Title"
|
14
|
+
@teaser = "Mock Teaser"
|
15
|
+
@text = "Mock Text"
|
16
|
+
|
17
|
+
super url
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.provider
|
21
|
+
'mock_service'
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.valid_url
|
25
|
+
/mock_me/
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_document
|
29
|
+
end
|
30
|
+
|
31
|
+
register_service
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'service'
|
2
|
+
|
3
|
+
module DiffNews
|
4
|
+
|
5
|
+
class NZZ < Service
|
6
|
+
def initialize url
|
7
|
+
super url
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.provider
|
11
|
+
'nzz.ch'
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.valid_url
|
15
|
+
/^https?:\/\/(www.)?nzz.ch/
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def title
|
20
|
+
normalize_text(strip_html(@doc.css(".title__name").first))
|
21
|
+
rescue
|
22
|
+
''
|
23
|
+
end
|
24
|
+
|
25
|
+
def teaser
|
26
|
+
normalize_text(strip_html(@doc.css(".leadtext").first))
|
27
|
+
rescue
|
28
|
+
''
|
29
|
+
end
|
30
|
+
|
31
|
+
def text
|
32
|
+
content = @doc.css("article.content").first
|
33
|
+
|
34
|
+
content.css('script').remove
|
35
|
+
content.css('.inlinePoll').remove
|
36
|
+
|
37
|
+
normalize_text(strip_html(content))
|
38
|
+
rescue
|
39
|
+
''
|
40
|
+
end
|
41
|
+
|
42
|
+
register_service
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'service'
|
2
|
+
require 'revision'
|
3
|
+
|
4
|
+
module DiffNews
|
5
|
+
|
6
|
+
class Tagesanzeiger < Service
|
7
|
+
def initialize url
|
8
|
+
super normalize_url(url)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.provider
|
12
|
+
'tagesanzeiger.ch'
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.valid_url
|
16
|
+
/^https?:\/\/(www.)?tagesanzeiger.ch/
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
# Remove the SEO parts, making the URL more unique
|
22
|
+
def normalize_url url
|
23
|
+
parts = url.split('/')
|
24
|
+
if parts.count > 5
|
25
|
+
(parts.first(4) + parts.last(2)).join('/')
|
26
|
+
else
|
27
|
+
url
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def title
|
32
|
+
normalize_text(strip_html(@doc.css("h1").first))
|
33
|
+
rescue
|
34
|
+
''
|
35
|
+
end
|
36
|
+
|
37
|
+
def teaser
|
38
|
+
normalize_text(strip_html(@doc.css("h3").first))
|
39
|
+
rescue
|
40
|
+
''
|
41
|
+
end
|
42
|
+
|
43
|
+
def text
|
44
|
+
content = @doc.css("#mainContent").first
|
45
|
+
|
46
|
+
content.css('script').remove
|
47
|
+
content.css('.inlinePoll').remove
|
48
|
+
|
49
|
+
normalize_text(strip_html(content))
|
50
|
+
rescue
|
51
|
+
''
|
52
|
+
end
|
53
|
+
|
54
|
+
register_service
|
55
|
+
end
|
56
|
+
end
|
data/lib/revision.rb
ADDED
data/lib/service.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
#
|
2
|
+
# News Service Factory
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'uuidtools'
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'open-uri'
|
8
|
+
|
9
|
+
require 'default_history'
|
10
|
+
|
11
|
+
module DiffNews
|
12
|
+
|
13
|
+
class UnknownNewspageError < StandardError
|
14
|
+
end
|
15
|
+
|
16
|
+
class Service
|
17
|
+
attr_reader :history
|
18
|
+
@@services = []
|
19
|
+
@@store_class = DefaultHistory
|
20
|
+
|
21
|
+
def provider
|
22
|
+
self.class.provider
|
23
|
+
end
|
24
|
+
|
25
|
+
def current_revision
|
26
|
+
return @history.revisions.last
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.store_class= store_class
|
30
|
+
@@store_class = store_class
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize url
|
34
|
+
@url = url
|
35
|
+
@uuid = UUIDTools::UUID.sha1_create(UUIDTools::UUID_URL_NAMESPACE, url)
|
36
|
+
load_document
|
37
|
+
load_history
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.register_service
|
41
|
+
puts "Registered Service Provider: #{self.provider}"
|
42
|
+
@@services << self
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.create url
|
46
|
+
match = @@services.select { |s| url =~ s.valid_url }
|
47
|
+
raise UnknownNewspageError if match.none?
|
48
|
+
|
49
|
+
match.first.new url
|
50
|
+
end
|
51
|
+
|
52
|
+
def strip_html partial
|
53
|
+
partial.css("br").each { |node| node.replace("\n") }
|
54
|
+
partial.css("p").each { |node| node.replace("#{node.text.strip}\n\n") }
|
55
|
+
partial.text.strip
|
56
|
+
end
|
57
|
+
|
58
|
+
def normalize_text text
|
59
|
+
empty_lines = 0
|
60
|
+
result = ''
|
61
|
+
text.each_line do |line|
|
62
|
+
line = line.strip
|
63
|
+
line = line.gsub(/\s+/, ' ')
|
64
|
+
empty_lines = line.length == 0 ? empty_lines + 1 : 0
|
65
|
+
if empty_lines < 2
|
66
|
+
result += line.strip + "\n"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
return result
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
def load_document
|
74
|
+
@doc = Nokogiri::HTML(open(@url))
|
75
|
+
end
|
76
|
+
|
77
|
+
def load_history
|
78
|
+
@history = @@store_class.new provider, @uuid
|
79
|
+
|
80
|
+
@history.load
|
81
|
+
@history.append_changed_revision title, teaser, text
|
82
|
+
@history.store
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
data/lib/version.info
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.9.0
|
data/lib/version.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#
|
2
|
+
# Version of vcs.rb
|
3
|
+
#
|
4
|
+
|
5
|
+
module DiffNews
|
6
|
+
def self.version_path
|
7
|
+
File.expand_path("version.info", File.dirname(__FILE__))
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
File.open(version_path, &:readline)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
$vcs_ruby_version = Gem::Version.new(DiffNews::version)
|
16
|
+
$vcs_ruby_name = 'DiffNews makes changes visible'
|
17
|
+
$vcs_ruby_short = 'diff_news'
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'minitest'
|
2
|
+
require 'diff_news'
|
3
|
+
|
4
|
+
class FileStoreTest < Minitest::Test
|
5
|
+
|
6
|
+
def mock_service_file_history
|
7
|
+
DiffNews::Service::store_class = DiffNews::FileHistory
|
8
|
+
mock = DiffNews::Service.create 'mock_me'
|
9
|
+
end
|
10
|
+
|
11
|
+
def tagi_history
|
12
|
+
DiffNews::Service::store_class = DiffNews::FileHistory
|
13
|
+
tagi = DiffNews::Service.create 'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709'
|
14
|
+
|
15
|
+
assert_equal 1, tagi.history.revisions.count
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'minitest'
|
2
|
+
require 'diff_news'
|
3
|
+
|
4
|
+
class NewsServiceTest < Minitest::Test
|
5
|
+
|
6
|
+
def test_mock_service_history
|
7
|
+
DiffNews::Service::store_class = DiffNews::DefaultHistory
|
8
|
+
service = DiffNews::Service.create 'mock_me'
|
9
|
+
|
10
|
+
assert_equal 1, service.history.revisions.count
|
11
|
+
assert_equal "Mock Teaser", service.history.revisions.first.teaser
|
12
|
+
assert_equal "Mock Title", service.current_revision.title
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_tagi_history
|
16
|
+
DiffNews::Service::store_class = DiffNews::DefaultHistory
|
17
|
+
tagi = DiffNews::Service.create 'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709'
|
18
|
+
assert_equal 1, tagi.history.revisions.count
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_nzz_history
|
22
|
+
DiffNews::Service::store_class = DiffNews::DefaultHistory
|
23
|
+
nzz = DiffNews::Service.create 'https://www.nzz.ch/finanzen/abgang-eines-wichtigen-softwarespezialisten-die-tesla-aktie-hat-derzeit-gegenwind-ld.1304770'
|
24
|
+
assert_equal 1, nzz.history.revisions.count
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'minitest'
|
2
|
+
require 'diff_news'
|
3
|
+
|
4
|
+
class ServiceTest < Minitest::Test
|
5
|
+
def test_create_providers
|
6
|
+
providers = {
|
7
|
+
'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709' => 'tagesanzeiger.ch',
|
8
|
+
'https://www.nzz.ch/international/europa/abstimmung-im-eu-parlament-xxx-fuer-beitrittsverhandlungen-mit-der-tuerkei-ld.1304606' => 'nzz.ch'
|
9
|
+
}
|
10
|
+
|
11
|
+
providers.each do |url, provider|
|
12
|
+
service = DiffNews::Service.create url
|
13
|
+
assert_equal service.provider, provider
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: diff_news
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Thomas Bruderer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-07-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.5.0
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.5'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.5.0
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: rake
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '11.0'
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 11.0.0
|
43
|
+
type: :development
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '11.0'
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 11.0.0
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: minitest
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '5.10'
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 5.0.0
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '5.10'
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 5.0.0
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: minitest-reporters
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '1.1'
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.0.0
|
83
|
+
type: :development
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.1'
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 1.0.0
|
93
|
+
description: ''
|
94
|
+
email: apophis@apophis.ch
|
95
|
+
executables: []
|
96
|
+
extensions: []
|
97
|
+
extra_rdoc_files: []
|
98
|
+
files:
|
99
|
+
- lib/default_history.rb
|
100
|
+
- lib/diff_news.rb
|
101
|
+
- lib/file_history.rb
|
102
|
+
- lib/news_service/mock.rb
|
103
|
+
- lib/news_service/nzz.rb
|
104
|
+
- lib/news_service/tagesanzeiger.rb
|
105
|
+
- lib/revision.rb
|
106
|
+
- lib/service.rb
|
107
|
+
- lib/version.info
|
108
|
+
- lib/version.rb
|
109
|
+
- test/file_store_test.rb
|
110
|
+
- test/news_service_test.rb
|
111
|
+
- test/service_test.rb
|
112
|
+
- test/test_helper.rb
|
113
|
+
homepage: https://github.com/FreeApophis/DiffNews
|
114
|
+
licenses:
|
115
|
+
- MIT
|
116
|
+
metadata: {}
|
117
|
+
post_install_message:
|
118
|
+
rdoc_options: []
|
119
|
+
require_paths:
|
120
|
+
- lib
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '2.2'
|
126
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
requirements: []
|
132
|
+
rubyforge_project:
|
133
|
+
rubygems_version: 2.6.10
|
134
|
+
signing_key:
|
135
|
+
specification_version: 4
|
136
|
+
summary: Tool to keep track of content changes on news sites
|
137
|
+
test_files:
|
138
|
+
- test/news_service_test.rb
|
139
|
+
- test/file_store_test.rb
|
140
|
+
- test/test_helper.rb
|
141
|
+
- test/service_test.rb
|