cw 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bb7266c839627f6135e9adf8e83b3e59f5112177
4
- data.tar.gz: b0da0ee8068f18450a1fedfee2e0b0df2d3b4053
3
+ metadata.gz: f32cecabf34fb0f987f2c33b7678f5d1de000655
4
+ data.tar.gz: 38ea73121579a8b0e194021e8997b191939e3bc8
5
5
  SHA512:
6
- metadata.gz: edadbf29105e8f07f58d899b9446bcd7e23dd0d70e9c9f455b4b93182ed21418a0dda8a813651f2461e493a2e9b4ac55240e20710de157de1582778761de16a3
7
- data.tar.gz: f1ec5443a2fd3a574460318171438d77511a4450f273f44d03b8f9674c26b38b01b30dfed56dff53c84dc7a06d4cb3dc7cdfebeccfc089885727e17f6901309c
6
+ metadata.gz: 3a9c6b660fba1f14a7429d73879cef6cc28790ecb8136576d580e3ad388a35cfdd12d4f8fdcee7d67b0b5427bc0a290db942359d73fa73faec06f2cc119cc2d3
7
+ data.tar.gz: a0bd05411135bfdbb77dd51091445bf5d3e556c1f05a7afbe5bc60787ec8c2c276e487ec3bd60807a8f4b89b3853f25ae631de76648498f45d05e3aa8c1c3478
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.2
1
+ 0.3.3
data/cw.gemspec CHANGED
@@ -19,16 +19,16 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib", "audio", "data/text", "test"]
20
20
 
21
21
  spec.required_ruby_version = '>= 2.0.0'
22
- spec.add_runtime_dependency 'oga', '~>2.7'
22
+ spec.add_runtime_dependency 'oga', '~> 2.8'
23
23
  spec.add_runtime_dependency 'httpclient'
24
24
  spec.add_runtime_dependency 'htmlentities', '>= 4.3.4'
25
25
  spec.add_runtime_dependency 'paint', '>= 1.0.1'
26
26
  spec.add_runtime_dependency 'rake', '>= 11.2.2'
27
27
  spec.add_runtime_dependency 'ruby-progressbar', '>= 1.8.1'
28
- spec.add_runtime_dependency 'sanitize', '~> 4.4.0'
29
28
  spec.add_runtime_dependency 'wavefile', '>= 0.7.0'
30
29
  spec.add_runtime_dependency 'parseconfig', '~> 1.0.8'
31
30
  spec.add_runtime_dependency 'rubyserial', '~> 0.4.0'
31
+
32
32
  spec.add_dependency 'os', '~> 0.9.6'
33
33
 
34
34
  spec.add_development_dependency 'version', '>= 1.0.0'
data/lib/cw.rb CHANGED
@@ -16,6 +16,7 @@ require_relative 'cw/alphabet'
16
16
  require_relative 'cw/numbers'
17
17
  require_relative 'cw/str'
18
18
  require_relative 'cw/rss'
19
+ require_relative 'cw/rss_clean'
19
20
  require_relative 'cw/words'
20
21
  require_relative 'cw/cl'
21
22
  require_relative 'cw/key_input'
@@ -3,7 +3,6 @@
3
3
  require 'oga'
4
4
  require 'httpclient'
5
5
  require "htmlentities"
6
- require 'sanitize'
7
6
 
8
7
  module CWG
9
8
 
@@ -41,8 +40,11 @@ module CWG
41
40
  unless(title.include?('VIDEO:') ||
42
41
  title.include?('In pictures:') ||
43
42
  title.include?('Morning business round-up'))
44
- @rss_articles << Sanitize.clean(coder.decode(title)) + '. ' +
45
- Sanitize.clean(coder.decode(description))
43
+ clean_title = CWG::RSSClean.new(title).scrub
44
+ clean_desc = CWG::RSSClean.new(description).scrub
45
+ # @rss_articles << Sanitize.clean(coder.decode(title)) + '. ' +
46
+ # Sanitize.clean(coder.decode(description))
47
+ @rss_articles << clean_title + '. ' + clean_desc
46
48
  count += 1
47
49
  break if count >= article_count
48
50
  end
@@ -0,0 +1,41 @@
1
+ require 'oga'
2
+
3
+ module CWG
4
+ class RSSClean
5
+ def initialize(html_fragment)
6
+ @html_fragment = html_fragment
7
+ end
8
+
9
+ def scrub(options = {})
10
+ blacklisted_tags = NON_CONTENT_TAGS + options.fetch(:blacklist, [])
11
+
12
+ sanitize(Oga.parse_html(html_fragment).children, blacklisted_tags)
13
+ end
14
+
15
+ private
16
+
17
+ attr_reader :html_fragment
18
+
19
+ NON_CONTENT_TAGS = %w(script style)
20
+ WHITESPACE_CONTENT_TAGS = %w(address article aside blockquote br dd div dl dt footer h1 h2 h3 h4 h5 h6 header hgroup hr li nav ol p pre section ul)
21
+
22
+ def sanitize(node_set, blacklisted_tags)
23
+ node_set.reject { |node| !text?(node) && blacklisted_tags.include?(node.name) }
24
+ .flat_map { |node| [whitespace(node, :prefix), text(node, blacklisted_tags), whitespace(node, :suffix)] }.join
25
+ end
26
+
27
+ def text?(node)
28
+ node.is_a?(Oga::XML::Text)
29
+ end
30
+
31
+ def whitespace(node, _position)
32
+ return ' ' if !text?(node) && WHITESPACE_CONTENT_TAGS.include?(node.name)
33
+ ''
34
+ end
35
+
36
+ def text(node, blacklisted_tags)
37
+ return node.text if text?(node)
38
+ sanitize(node.children, blacklisted_tags)
39
+ end
40
+ end
41
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martyn Jago
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.7'
19
+ version: '2.8'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.7'
26
+ version: '2.8'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: httpclient
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: 1.8.1
97
- - !ruby/object:Gem::Dependency
98
- name: sanitize
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: 4.4.0
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: 4.4.0
111
97
  - !ruby/object:Gem::Dependency
112
98
  name: wavefile
113
99
  requirement: !ruby/object:Gem::Requirement
@@ -310,6 +296,7 @@ files:
310
296
  - lib/cw/repeat_word.rb
311
297
  - lib/cw/reveal.rb
312
298
  - lib/cw/rss.rb
299
+ - lib/cw/rss_clean.rb
313
300
  - lib/cw/sentence.rb
314
301
  - lib/cw/speak.rb
315
302
  - lib/cw/spoken.rb