diff_news 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b8fb36e0227389624d87173d0c0c6a1345a7f933
4
+ data.tar.gz: 4fa5f28152f01432fdde5b51df1774439080024c
5
+ SHA512:
6
+ metadata.gz: adca1f78c3bcf8fb43eab5b88fbe87117d98f4e56e30f577ab3d438dba9d78e8e5bcc8bce8092e948bcf3dca0515d379492bf6eac59b9ff7f1fffe62f54c7c2e
7
+ data.tar.gz: 357f4e1087332e2ff2618ea76fa427c2e732e8cffe6e22f9743dbb94683b9456f76fb96c7abe19bcd91dee361fa3fc4529b1b8ec5d3018df212ac5e63156357a
@@ -0,0 +1,42 @@
1
+ #
2
+ # Store and retrieve a history by provider and uuid
3
+ #
4
+
5
+ require 'revision'
6
+
7
+ module DiffNews
8
+
9
+ class DefaultHistory
10
+ attr_reader :revisions
11
+
12
+ def initialize provider, uuid
13
+ @provider = provider
14
+ @uuid = uuid
15
+
16
+ @revisions = []
17
+ end
18
+
19
+ def load
20
+ end
21
+
22
+ def store
23
+ end
24
+
25
+ def revision_changed? rev1, rev2
26
+ return true if rev1 == nil
27
+ return rev1.title != rev2.title || rev1.teaser != rev2.teaser || rev1.text != rev2.text
28
+ end
29
+
30
+ def append_changed_revision title, teaser, text
31
+ new_revision = Revision.new @provider, @uuid, @revisions.count, title, teaser, text
32
+ if revision_changed? @revisions.last, new_revision
33
+ @revisions << new_revision
34
+ end
35
+ end
36
+
37
+ def clear
38
+ @revisions.clear
39
+ end
40
+ end
41
+
42
+ end
data/lib/diff_news.rb ADDED
@@ -0,0 +1,12 @@
1
+ #
2
+ # Diff News
3
+ #
4
+
5
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
6
+
7
+ require 'service'
8
+ require 'default_history'
9
+ require 'file_history'
10
+ require 'news_service/tagesanzeiger'
11
+ require 'news_service/nzz'
12
+ require 'news_service/mock'
@@ -0,0 +1,36 @@
1
+ #
2
+ # Store and retrieve a history by provider and uuid
3
+ #
4
+
5
+ require 'revision'
6
+ require 'yaml'
7
+
8
+ module DiffNews
9
+
10
+ class FileHistory < DefaultHistory
11
+ attr_reader :revisions
12
+
13
+ def initialize provider, uuid
14
+ super provider, uuid
15
+ end
16
+
17
+ def file_path(extension)
18
+ "#{@provider}/#{@uuid}.#{extension}"
19
+ end
20
+
21
+ def load
22
+ if File.exist? file_path('yml')
23
+ @revisions = YAML.load(File.open(file_path('yml'), 'r:UTF-8', &:read))
24
+ end
25
+ end
26
+
27
+ def store
28
+ if @revisions
29
+ File.open(file_path('yml'), 'w:UTF-8') do |file|
30
+ file.write(YAML::dump(@revisions))
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ end
@@ -0,0 +1,35 @@
1
+ #
2
+ # Mock Service Implementation
3
+ #
4
+
5
+ require 'service'
6
+
7
+ module DiffNews
8
+
9
+ class MockService < Service
10
+ attr_reader :title, :teaser, :text
11
+
12
+ def initialize url
13
+ @title = "Mock Title"
14
+ @teaser = "Mock Teaser"
15
+ @text = "Mock Text"
16
+
17
+ super url
18
+ end
19
+
20
+ def self.provider
21
+ 'mock_service'
22
+ end
23
+
24
+ def self.valid_url
25
+ /mock_me/
26
+ end
27
+
28
+ def load_document
29
+ end
30
+
31
+ register_service
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,46 @@
1
+ require 'service'
2
+
3
+ module DiffNews
4
+
5
+ class NZZ < Service
6
+ def initialize url
7
+ super url
8
+ end
9
+
10
+ def self.provider
11
+ 'nzz.ch'
12
+ end
13
+
14
+ def self.valid_url
15
+ /^https?:\/\/(www.)?nzz.ch/
16
+ end
17
+
18
+ private
19
+ def title
20
+ normalize_text(strip_html(@doc.css(".title__name").first))
21
+ rescue
22
+ ''
23
+ end
24
+
25
+ def teaser
26
+ normalize_text(strip_html(@doc.css(".leadtext").first))
27
+ rescue
28
+ ''
29
+ end
30
+
31
+ def text
32
+ content = @doc.css("article.content").first
33
+
34
+ content.css('script').remove
35
+ content.css('.inlinePoll').remove
36
+
37
+ normalize_text(strip_html(content))
38
+ rescue
39
+ ''
40
+ end
41
+
42
+ register_service
43
+ end
44
+
45
+ end
46
+
@@ -0,0 +1,56 @@
1
+ require 'service'
2
+ require 'revision'
3
+
4
+ module DiffNews
5
+
6
+ class Tagesanzeiger < Service
7
+ def initialize url
8
+ super normalize_url(url)
9
+ end
10
+
11
+ def self.provider
12
+ 'tagesanzeiger.ch'
13
+ end
14
+
15
+ def self.valid_url
16
+ /^https?:\/\/(www.)?tagesanzeiger.ch/
17
+ end
18
+
19
+ private
20
+
21
+ # Remove the SEO parts, making the URL more unique
22
+ def normalize_url url
23
+ parts = url.split('/')
24
+ if parts.count > 5
25
+ (parts.first(4) + parts.last(2)).join('/')
26
+ else
27
+ url
28
+ end
29
+ end
30
+
31
+ def title
32
+ normalize_text(strip_html(@doc.css("h1").first))
33
+ rescue
34
+ ''
35
+ end
36
+
37
+ def teaser
38
+ normalize_text(strip_html(@doc.css("h3").first))
39
+ rescue
40
+ ''
41
+ end
42
+
43
+ def text
44
+ content = @doc.css("#mainContent").first
45
+
46
+ content.css('script').remove
47
+ content.css('.inlinePoll').remove
48
+
49
+ normalize_text(strip_html(content))
50
+ rescue
51
+ ''
52
+ end
53
+
54
+ register_service
55
+ end
56
+ end
data/lib/revision.rb ADDED
@@ -0,0 +1,7 @@
1
+ #
2
+ # POD for revision, container for the revision data stored in file or db
3
+ #
4
+
5
+ module DiffNews
6
+ Revision = Struct.new("Revision", :provider, :uuid, :revision, :title, :teaser, :text)
7
+ end
data/lib/service.rb ADDED
@@ -0,0 +1,86 @@
1
+ #
2
+ # News Service Factory
3
+ #
4
+
5
+ require 'uuidtools'
6
+ require 'nokogiri'
7
+ require 'open-uri'
8
+
9
+ require 'default_history'
10
+
11
+ module DiffNews
12
+
13
+ class UnknownNewspageError < StandardError
14
+ end
15
+
16
+ class Service
17
+ attr_reader :history
18
+ @@services = []
19
+ @@store_class = DefaultHistory
20
+
21
+ def provider
22
+ self.class.provider
23
+ end
24
+
25
+ def current_revision
26
+ return @history.revisions.last
27
+ end
28
+
29
+ def self.store_class= store_class
30
+ @@store_class = store_class
31
+ end
32
+
33
+ def initialize url
34
+ @url = url
35
+ @uuid = UUIDTools::UUID.sha1_create(UUIDTools::UUID_URL_NAMESPACE, url)
36
+ load_document
37
+ load_history
38
+ end
39
+
40
+ def self.register_service
41
+ puts "Registered Service Provider: #{self.provider}"
42
+ @@services << self
43
+ end
44
+
45
+ def self.create url
46
+ match = @@services.select { |s| url =~ s.valid_url }
47
+ raise UnknownNewspageError if match.none?
48
+
49
+ match.first.new url
50
+ end
51
+
52
+ def strip_html partial
53
+ partial.css("br").each { |node| node.replace("\n") }
54
+ partial.css("p").each { |node| node.replace("#{node.text.strip}\n\n") }
55
+ partial.text.strip
56
+ end
57
+
58
+ def normalize_text text
59
+ empty_lines = 0
60
+ result = ''
61
+ text.each_line do |line|
62
+ line = line.strip
63
+ line = line.gsub(/\s+/, ' ')
64
+ empty_lines = line.length == 0 ? empty_lines + 1 : 0
65
+ if empty_lines < 2
66
+ result += line.strip + "\n"
67
+ end
68
+ end
69
+ return result
70
+ end
71
+
72
+ private
73
+ def load_document
74
+ @doc = Nokogiri::HTML(open(@url))
75
+ end
76
+
77
+ def load_history
78
+ @history = @@store_class.new provider, @uuid
79
+
80
+ @history.load
81
+ @history.append_changed_revision title, teaser, text
82
+ @history.store
83
+ end
84
+ end
85
+
86
+ end
data/lib/version.info ADDED
@@ -0,0 +1 @@
1
+ 0.9.0
data/lib/version.rb ADDED
@@ -0,0 +1,17 @@
1
+ #
2
+ # Version of vcs.rb
3
+ #
4
+
5
+ module DiffNews
6
+ def self.version_path
7
+ File.expand_path("version.info", File.dirname(__FILE__))
8
+ end
9
+
10
+ def self.version
11
+ File.open(version_path, &:readline)
12
+ end
13
+ end
14
+
15
+ $vcs_ruby_version = Gem::Version.new(DiffNews::version)
16
+ $vcs_ruby_name = 'DiffNews makes changes visible'
17
+ $vcs_ruby_short = 'diff_news'
@@ -0,0 +1,18 @@
1
+ require 'minitest'
2
+ require 'diff_news'
3
+
4
+ class FileStoreTest < Minitest::Test
5
+
6
+ def mock_service_file_history
7
+ DiffNews::Service::store_class = DiffNews::FileHistory
8
+ mock = DiffNews::Service.create 'mock_me'
9
+ end
10
+
11
+ def tagi_history
12
+ DiffNews::Service::store_class = DiffNews::FileHistory
13
+ tagi = DiffNews::Service.create 'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709'
14
+
15
+ assert_equal 1, tagi.history.revisions.count
16
+ end
17
+
18
+ end
@@ -0,0 +1,26 @@
1
+ require 'minitest'
2
+ require 'diff_news'
3
+
4
+ class NewsServiceTest < Minitest::Test
5
+
6
+ def test_mock_service_history
7
+ DiffNews::Service::store_class = DiffNews::DefaultHistory
8
+ service = DiffNews::Service.create 'mock_me'
9
+
10
+ assert_equal 1, service.history.revisions.count
11
+ assert_equal "Mock Teaser", service.history.revisions.first.teaser
12
+ assert_equal "Mock Title", service.current_revision.title
13
+ end
14
+
15
+ def test_tagi_history
16
+ DiffNews::Service::store_class = DiffNews::DefaultHistory
17
+ tagi = DiffNews::Service.create 'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709'
18
+ assert_equal 1, tagi.history.revisions.count
19
+ end
20
+
21
+ def test_nzz_history
22
+ DiffNews::Service::store_class = DiffNews::DefaultHistory
23
+ nzz = DiffNews::Service.create 'https://www.nzz.ch/finanzen/abgang-eines-wichtigen-softwarespezialisten-die-tesla-aktie-hat-derzeit-gegenwind-ld.1304770'
24
+ assert_equal 1, nzz.history.revisions.count
25
+ end
26
+ end
@@ -0,0 +1,16 @@
1
+ require 'minitest'
2
+ require 'diff_news'
3
+
4
+ class ServiceTest < Minitest::Test
5
+ def test_create_providers
6
+ providers = {
7
+ 'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709' => 'tagesanzeiger.ch',
8
+ 'https://www.nzz.ch/international/europa/abstimmung-im-eu-parlament-xxx-fuer-beitrittsverhandlungen-mit-der-tuerkei-ld.1304606' => 'nzz.ch'
9
+ }
10
+
11
+ providers.each do |url, provider|
12
+ service = DiffNews::Service.create url
13
+ assert_equal service.provider, provider
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ # file: test/test_helper.rb
2
+ require 'minitest/autorun'
3
+ require 'minitest/reporters'
4
+
5
+ Minitest::Reporters.use! Minitest::Reporters::SpecReporter.new
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: diff_news
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.0
5
+ platform: ruby
6
+ authors:
7
+ - Thomas Bruderer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-07-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.5.0
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.5'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.5.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: rake
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '11.0'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 11.0.0
43
+ type: :development
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '11.0'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 11.0.0
53
+ - !ruby/object:Gem::Dependency
54
+ name: minitest
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '5.10'
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 5.0.0
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '5.10'
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 5.0.0
73
+ - !ruby/object:Gem::Dependency
74
+ name: minitest-reporters
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: '1.1'
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.0.0
83
+ type: :development
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.1'
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 1.0.0
93
+ description: ''
94
+ email: apophis@apophis.ch
95
+ executables: []
96
+ extensions: []
97
+ extra_rdoc_files: []
98
+ files:
99
+ - lib/default_history.rb
100
+ - lib/diff_news.rb
101
+ - lib/file_history.rb
102
+ - lib/news_service/mock.rb
103
+ - lib/news_service/nzz.rb
104
+ - lib/news_service/tagesanzeiger.rb
105
+ - lib/revision.rb
106
+ - lib/service.rb
107
+ - lib/version.info
108
+ - lib/version.rb
109
+ - test/file_store_test.rb
110
+ - test/news_service_test.rb
111
+ - test/service_test.rb
112
+ - test/test_helper.rb
113
+ homepage: https://github.com/FreeApophis/DiffNews
114
+ licenses:
115
+ - MIT
116
+ metadata: {}
117
+ post_install_message:
118
+ rdoc_options: []
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ version: '2.2'
126
+ required_rubygems_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ requirements: []
132
+ rubyforge_project:
133
+ rubygems_version: 2.6.10
134
+ signing_key:
135
+ specification_version: 4
136
+ summary: Tool to keep track of content changes on news sites
137
+ test_files:
138
+ - test/news_service_test.rb
139
+ - test/file_store_test.rb
140
+ - test/test_helper.rb
141
+ - test/service_test.rb