cw 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/cw.gemspec +2 -2
- data/lib/cw.rb +1 -0
- data/lib/cw/rss.rb +5 -3
- data/lib/cw/rss_clean.rb +41 -0
- metadata +4 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f32cecabf34fb0f987f2c33b7678f5d1de000655
|
4
|
+
data.tar.gz: 38ea73121579a8b0e194021e8997b191939e3bc8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a9c6b660fba1f14a7429d73879cef6cc28790ecb8136576d580e3ad388a35cfdd12d4f8fdcee7d67b0b5427bc0a290db942359d73fa73faec06f2cc119cc2d3
|
7
|
+
data.tar.gz: a0bd05411135bfdbb77dd51091445bf5d3e556c1f05a7afbe5bc60787ec8c2c276e487ec3bd60807a8f4b89b3853f25ae631de76648498f45d05e3aa8c1c3478
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.3.
|
1
|
+
0.3.3
|
data/cw.gemspec
CHANGED
@@ -19,16 +19,16 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib", "audio", "data/text", "test"]
|
20
20
|
|
21
21
|
spec.required_ruby_version = '>= 2.0.0'
|
22
|
-
spec.add_runtime_dependency 'oga', '~>2.
|
22
|
+
spec.add_runtime_dependency 'oga', '~> 2.8'
|
23
23
|
spec.add_runtime_dependency 'httpclient'
|
24
24
|
spec.add_runtime_dependency 'htmlentities', '>= 4.3.4'
|
25
25
|
spec.add_runtime_dependency 'paint', '>= 1.0.1'
|
26
26
|
spec.add_runtime_dependency 'rake', '>= 11.2.2'
|
27
27
|
spec.add_runtime_dependency 'ruby-progressbar', '>= 1.8.1'
|
28
|
-
spec.add_runtime_dependency 'sanitize', '~> 4.4.0'
|
29
28
|
spec.add_runtime_dependency 'wavefile', '>= 0.7.0'
|
30
29
|
spec.add_runtime_dependency 'parseconfig', '~> 1.0.8'
|
31
30
|
spec.add_runtime_dependency 'rubyserial', '~> 0.4.0'
|
31
|
+
|
32
32
|
spec.add_dependency 'os', '~> 0.9.6'
|
33
33
|
|
34
34
|
spec.add_development_dependency 'version', '>= 1.0.0'
|
data/lib/cw.rb
CHANGED
data/lib/cw/rss.rb
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
require 'oga'
|
4
4
|
require 'httpclient'
|
5
5
|
require "htmlentities"
|
6
|
-
require 'sanitize'
|
7
6
|
|
8
7
|
module CWG
|
9
8
|
|
@@ -41,8 +40,11 @@ module CWG
|
|
41
40
|
unless(title.include?('VIDEO:') ||
|
42
41
|
title.include?('In pictures:') ||
|
43
42
|
title.include?('Morning business round-up'))
|
44
|
-
|
45
|
-
|
43
|
+
clean_title = CWG::RSSClean.new(title).scrub
|
44
|
+
clean_desc = CWG::RSSClean.new(description).scrub
|
45
|
+
# @rss_articles << Sanitize.clean(coder.decode(title)) + '. ' +
|
46
|
+
# Sanitize.clean(coder.decode(description))
|
47
|
+
@rss_articles << clean_title + '. ' + clean_desc
|
46
48
|
count += 1
|
47
49
|
break if count >= article_count
|
48
50
|
end
|
data/lib/cw/rss_clean.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'oga'
|
2
|
+
|
3
|
+
module CWG
|
4
|
+
class RSSClean
|
5
|
+
def initialize(html_fragment)
|
6
|
+
@html_fragment = html_fragment
|
7
|
+
end
|
8
|
+
|
9
|
+
def scrub(options = {})
|
10
|
+
blacklisted_tags = NON_CONTENT_TAGS + options.fetch(:blacklist, [])
|
11
|
+
|
12
|
+
sanitize(Oga.parse_html(html_fragment).children, blacklisted_tags)
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
attr_reader :html_fragment
|
18
|
+
|
19
|
+
NON_CONTENT_TAGS = %w(script style)
|
20
|
+
WHITESPACE_CONTENT_TAGS = %w(address article aside blockquote br dd div dl dt footer h1 h2 h3 h4 h5 h6 header hgroup hr li nav ol p pre section ul)
|
21
|
+
|
22
|
+
def sanitize(node_set, blacklisted_tags)
|
23
|
+
node_set.reject { |node| !text?(node) && blacklisted_tags.include?(node.name) }
|
24
|
+
.flat_map { |node| [whitespace(node, :prefix), text(node, blacklisted_tags), whitespace(node, :suffix)] }.join
|
25
|
+
end
|
26
|
+
|
27
|
+
def text?(node)
|
28
|
+
node.is_a?(Oga::XML::Text)
|
29
|
+
end
|
30
|
+
|
31
|
+
def whitespace(node, _position)
|
32
|
+
return ' ' if !text?(node) && WHITESPACE_CONTENT_TAGS.include?(node.name)
|
33
|
+
''
|
34
|
+
end
|
35
|
+
|
36
|
+
def text(node, blacklisted_tags)
|
37
|
+
return node.text if text?(node)
|
38
|
+
sanitize(node.children, blacklisted_tags)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martyn Jago
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '2.
|
19
|
+
version: '2.8'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2.
|
26
|
+
version: '2.8'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: httpclient
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,20 +94,6 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: 1.8.1
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: sanitize
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: 4.4.0
|
104
|
-
type: :runtime
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - "~>"
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: 4.4.0
|
111
97
|
- !ruby/object:Gem::Dependency
|
112
98
|
name: wavefile
|
113
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -310,6 +296,7 @@ files:
|
|
310
296
|
- lib/cw/repeat_word.rb
|
311
297
|
- lib/cw/reveal.rb
|
312
298
|
- lib/cw/rss.rb
|
299
|
+
- lib/cw/rss_clean.rb
|
313
300
|
- lib/cw/sentence.rb
|
314
301
|
- lib/cw/speak.rb
|
315
302
|
- lib/cw/spoken.rb
|