diff_news 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b8fb36e0227389624d87173d0c0c6a1345a7f933
4
+ data.tar.gz: 4fa5f28152f01432fdde5b51df1774439080024c
5
+ SHA512:
6
+ metadata.gz: adca1f78c3bcf8fb43eab5b88fbe87117d98f4e56e30f577ab3d438dba9d78e8e5bcc8bce8092e948bcf3dca0515d379492bf6eac59b9ff7f1fffe62f54c7c2e
7
+ data.tar.gz: 357f4e1087332e2ff2618ea76fa427c2e732e8cffe6e22f9743dbb94683b9456f76fb96c7abe19bcd91dee361fa3fc4529b1b8ec5d3018df212ac5e63156357a
@@ -0,0 +1,42 @@
1
+ #
2
+ # Store and retrieve a history by provider and uuid
3
+ #
4
+
5
+ require 'revision'
6
+
7
+ module DiffNews
8
+
9
+ class DefaultHistory
10
+ attr_reader :revisions
11
+
12
+ def initialize provider, uuid
13
+ @provider = provider
14
+ @uuid = uuid
15
+
16
+ @revisions = []
17
+ end
18
+
19
+ def load
20
+ end
21
+
22
+ def store
23
+ end
24
+
25
+ def revision_changed? rev1, rev2
26
+ return true if rev1 == nil
27
+ return rev1.title != rev2.title || rev1.teaser != rev2.teaser || rev1.text != rev2.text
28
+ end
29
+
30
+ def append_changed_revision title, teaser, text
31
+ new_revision = Revision.new @provider, @uuid, @revisions.count, title, teaser, text
32
+ if revision_changed? @revisions.last, new_revision
33
+ @revisions << new_revision
34
+ end
35
+ end
36
+
37
+ def clear
38
+ @revisions.clear
39
+ end
40
+ end
41
+
42
+ end
data/lib/diff_news.rb ADDED
@@ -0,0 +1,12 @@
1
+ #
2
+ # Diff News
3
+ #
4
+
5
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
6
+
7
+ require 'service'
8
+ require 'default_history'
9
+ require 'file_history'
10
+ require 'news_service/tagesanzeiger'
11
+ require 'news_service/nzz'
12
+ require 'news_service/mock'
@@ -0,0 +1,36 @@
1
+ #
2
+ # Store and retrieve a history by provider and uuid
3
+ #
4
+
5
+ require 'revision'
6
+ require 'yaml'
7
+
8
+ module DiffNews
9
+
10
+ class FileHistory < DefaultHistory
11
+ attr_reader :revisions
12
+
13
+ def initialize provider, uuid
14
+ super provider, uuid
15
+ end
16
+
17
+ def file_path(extension)
18
+ "#{@provider}/#{@uuid}.#{extension}"
19
+ end
20
+
21
+ def load
22
+ if File.exist? file_path('yml')
23
+ @revisions = YAML.load(File.open(file_path('yml'), 'r:UTF-8', &:read))
24
+ end
25
+ end
26
+
27
+ def store
28
+ if @revisions
29
+ File.open(file_path('yml'), 'w:UTF-8') do |file|
30
+ file.write(YAML::dump(@revisions))
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ end
@@ -0,0 +1,35 @@
1
+ #
2
+ # Mock Service Implementation
3
+ #
4
+
5
+ require 'service'
6
+
7
+ module DiffNews
8
+
9
+ class MockService < Service
10
+ attr_reader :title, :teaser, :text
11
+
12
+ def initialize url
13
+ @title = "Mock Title"
14
+ @teaser = "Mock Teaser"
15
+ @text = "Mock Text"
16
+
17
+ super url
18
+ end
19
+
20
+ def self.provider
21
+ 'mock_service'
22
+ end
23
+
24
+ def self.valid_url
25
+ /mock_me/
26
+ end
27
+
28
+ def load_document
29
+ end
30
+
31
+ register_service
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,46 @@
1
+ require 'service'
2
+
3
+ module DiffNews
4
+
5
+ class NZZ < Service
6
+ def initialize url
7
+ super url
8
+ end
9
+
10
+ def self.provider
11
+ 'nzz.ch'
12
+ end
13
+
14
+ def self.valid_url
15
+ /^https?:\/\/(www.)?nzz.ch/
16
+ end
17
+
18
+ private
19
+ def title
20
+ normalize_text(strip_html(@doc.css(".title__name").first))
21
+ rescue
22
+ ''
23
+ end
24
+
25
+ def teaser
26
+ normalize_text(strip_html(@doc.css(".leadtext").first))
27
+ rescue
28
+ ''
29
+ end
30
+
31
+ def text
32
+ content = @doc.css("article.content").first
33
+
34
+ content.css('script').remove
35
+ content.css('.inlinePoll').remove
36
+
37
+ normalize_text(strip_html(content))
38
+ rescue
39
+ ''
40
+ end
41
+
42
+ register_service
43
+ end
44
+
45
+ end
46
+
@@ -0,0 +1,56 @@
1
+ require 'service'
2
+ require 'revision'
3
+
4
+ module DiffNews
5
+
6
+ class Tagesanzeiger < Service
7
+ def initialize url
8
+ super normalize_url(url)
9
+ end
10
+
11
+ def self.provider
12
+ 'tagesanzeiger.ch'
13
+ end
14
+
15
+ def self.valid_url
16
+ /^https?:\/\/(www.)?tagesanzeiger.ch/
17
+ end
18
+
19
+ private
20
+
21
+ # Remove the SEO parts, making the URL more unique
22
+ def normalize_url url
23
+ parts = url.split('/')
24
+ if parts.count > 5
25
+ (parts.first(4) + parts.last(2)).join('/')
26
+ else
27
+ url
28
+ end
29
+ end
30
+
31
+ def title
32
+ normalize_text(strip_html(@doc.css("h1").first))
33
+ rescue
34
+ ''
35
+ end
36
+
37
+ def teaser
38
+ normalize_text(strip_html(@doc.css("h3").first))
39
+ rescue
40
+ ''
41
+ end
42
+
43
+ def text
44
+ content = @doc.css("#mainContent").first
45
+
46
+ content.css('script').remove
47
+ content.css('.inlinePoll').remove
48
+
49
+ normalize_text(strip_html(content))
50
+ rescue
51
+ ''
52
+ end
53
+
54
+ register_service
55
+ end
56
+ end
data/lib/revision.rb ADDED
@@ -0,0 +1,7 @@
1
+ #
2
+ # POD for revision, container for the revision data stored in file or db
3
+ #
4
+
5
+ module DiffNews
6
+ Revision = Struct.new("Revision", :provider, :uuid, :revision, :title, :teaser, :text)
7
+ end
data/lib/service.rb ADDED
@@ -0,0 +1,86 @@
1
+ #
2
+ # News Service Factory
3
+ #
4
+
5
+ require 'uuidtools'
6
+ require 'nokogiri'
7
+ require 'open-uri'
8
+
9
+ require 'default_history'
10
+
11
+ module DiffNews
12
+
13
+ class UnknownNewspageError < StandardError
14
+ end
15
+
16
+ class Service
17
+ attr_reader :history
18
+ @@services = []
19
+ @@store_class = DefaultHistory
20
+
21
+ def provider
22
+ self.class.provider
23
+ end
24
+
25
+ def current_revision
26
+ return @history.revisions.last
27
+ end
28
+
29
+ def self.store_class= store_class
30
+ @@store_class = store_class
31
+ end
32
+
33
+ def initialize url
34
+ @url = url
35
+ @uuid = UUIDTools::UUID.sha1_create(UUIDTools::UUID_URL_NAMESPACE, url)
36
+ load_document
37
+ load_history
38
+ end
39
+
40
+ def self.register_service
41
+ puts "Registered Service Provider: #{self.provider}"
42
+ @@services << self
43
+ end
44
+
45
+ def self.create url
46
+ match = @@services.select { |s| url =~ s.valid_url }
47
+ raise UnknownNewspageError if match.none?
48
+
49
+ match.first.new url
50
+ end
51
+
52
+ def strip_html partial
53
+ partial.css("br").each { |node| node.replace("\n") }
54
+ partial.css("p").each { |node| node.replace("#{node.text.strip}\n\n") }
55
+ partial.text.strip
56
+ end
57
+
58
+ def normalize_text text
59
+ empty_lines = 0
60
+ result = ''
61
+ text.each_line do |line|
62
+ line = line.strip
63
+ line = line.gsub(/\s+/, ' ')
64
+ empty_lines = line.length == 0 ? empty_lines + 1 : 0
65
+ if empty_lines < 2
66
+ result += line.strip + "\n"
67
+ end
68
+ end
69
+ return result
70
+ end
71
+
72
+ private
73
+ def load_document
74
+ @doc = Nokogiri::HTML(open(@url))
75
+ end
76
+
77
+ def load_history
78
+ @history = @@store_class.new provider, @uuid
79
+
80
+ @history.load
81
+ @history.append_changed_revision title, teaser, text
82
+ @history.store
83
+ end
84
+ end
85
+
86
+ end
data/lib/version.info ADDED
@@ -0,0 +1 @@
1
+ 0.9.0
data/lib/version.rb ADDED
@@ -0,0 +1,17 @@
1
+ #
2
+ # Version of vcs.rb
3
+ #
4
+
5
+ module DiffNews
6
+ def self.version_path
7
+ File.expand_path("version.info", File.dirname(__FILE__))
8
+ end
9
+
10
+ def self.version
11
+ File.open(version_path, &:readline)
12
+ end
13
+ end
14
+
15
+ $vcs_ruby_version = Gem::Version.new(DiffNews::version)
16
+ $vcs_ruby_name = 'DiffNews makes changes visible'
17
+ $vcs_ruby_short = 'diff_news'
@@ -0,0 +1,18 @@
1
+ require 'minitest'
2
+ require 'diff_news'
3
+
4
+ class FileStoreTest < Minitest::Test
5
+
6
+ def mock_service_file_history
7
+ DiffNews::Service::store_class = DiffNews::FileHistory
8
+ mock = DiffNews::Service.create 'mock_me'
9
+ end
10
+
11
+ def tagi_history
12
+ DiffNews::Service::store_class = DiffNews::FileHistory
13
+ tagi = DiffNews::Service.create 'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709'
14
+
15
+ assert_equal 1, tagi.history.revisions.count
16
+ end
17
+
18
+ end
@@ -0,0 +1,26 @@
1
+ require 'minitest'
2
+ require 'diff_news'
3
+
4
+ class NewsServiceTest < Minitest::Test
5
+
6
+ def test_mock_service_history
7
+ DiffNews::Service::store_class = DiffNews::DefaultHistory
8
+ service = DiffNews::Service.create 'mock_me'
9
+
10
+ assert_equal 1, service.history.revisions.count
11
+ assert_equal "Mock Teaser", service.history.revisions.first.teaser
12
+ assert_equal "Mock Title", service.current_revision.title
13
+ end
14
+
15
+ def test_tagi_history
16
+ DiffNews::Service::store_class = DiffNews::DefaultHistory
17
+ tagi = DiffNews::Service.create 'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709'
18
+ assert_equal 1, tagi.history.revisions.count
19
+ end
20
+
21
+ def test_nzz_history
22
+ DiffNews::Service::store_class = DiffNews::DefaultHistory
23
+ nzz = DiffNews::Service.create 'https://www.nzz.ch/finanzen/abgang-eines-wichtigen-softwarespezialisten-die-tesla-aktie-hat-derzeit-gegenwind-ld.1304770'
24
+ assert_equal 1, nzz.history.revisions.count
25
+ end
26
+ end
@@ -0,0 +1,16 @@
1
+ require 'minitest'
2
+ require 'diff_news'
3
+
4
+ class ServiceTest < Minitest::Test
5
+ def test_create_providers
6
+ providers = {
7
+ 'http://www.tagesanzeiger.ch/sport/tennis/aufschlag-smash-und-tschuess/story/10062709' => 'tagesanzeiger.ch',
8
+ 'https://www.nzz.ch/international/europa/abstimmung-im-eu-parlament-xxx-fuer-beitrittsverhandlungen-mit-der-tuerkei-ld.1304606' => 'nzz.ch'
9
+ }
10
+
11
+ providers.each do |url, provider|
12
+ service = DiffNews::Service.create url
13
+ assert_equal service.provider, provider
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ # file: test/test_helper.rb
2
+ require 'minitest/autorun'
3
+ require 'minitest/reporters'
4
+
5
+ Minitest::Reporters.use! Minitest::Reporters::SpecReporter.new
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: diff_news
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.0
5
+ platform: ruby
6
+ authors:
7
+ - Thomas Bruderer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-07-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.5.0
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.5'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.5.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: rake
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '11.0'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 11.0.0
43
+ type: :development
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '11.0'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 11.0.0
53
+ - !ruby/object:Gem::Dependency
54
+ name: minitest
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '5.10'
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 5.0.0
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '5.10'
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 5.0.0
73
+ - !ruby/object:Gem::Dependency
74
+ name: minitest-reporters
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: '1.1'
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.0.0
83
+ type: :development
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.1'
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 1.0.0
93
+ description: ''
94
+ email: apophis@apophis.ch
95
+ executables: []
96
+ extensions: []
97
+ extra_rdoc_files: []
98
+ files:
99
+ - lib/default_history.rb
100
+ - lib/diff_news.rb
101
+ - lib/file_history.rb
102
+ - lib/news_service/mock.rb
103
+ - lib/news_service/nzz.rb
104
+ - lib/news_service/tagesanzeiger.rb
105
+ - lib/revision.rb
106
+ - lib/service.rb
107
+ - lib/version.info
108
+ - lib/version.rb
109
+ - test/file_store_test.rb
110
+ - test/news_service_test.rb
111
+ - test/service_test.rb
112
+ - test/test_helper.rb
113
+ homepage: https://github.com/FreeApophis/DiffNews
114
+ licenses:
115
+ - MIT
116
+ metadata: {}
117
+ post_install_message:
118
+ rdoc_options: []
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ version: '2.2'
126
+ required_rubygems_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ requirements: []
132
+ rubyforge_project:
133
+ rubygems_version: 2.6.10
134
+ signing_key:
135
+ specification_version: 4
136
+ summary: Tool to keep track of content changes on news sites
137
+ test_files:
138
+ - test/news_service_test.rb
139
+ - test/file_store_test.rb
140
+ - test/test_helper.rb
141
+ - test/service_test.rb