cw 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bb7266c839627f6135e9adf8e83b3e59f5112177
4
- data.tar.gz: b0da0ee8068f18450a1fedfee2e0b0df2d3b4053
3
+ metadata.gz: f32cecabf34fb0f987f2c33b7678f5d1de000655
4
+ data.tar.gz: 38ea73121579a8b0e194021e8997b191939e3bc8
5
5
  SHA512:
6
- metadata.gz: edadbf29105e8f07f58d899b9446bcd7e23dd0d70e9c9f455b4b93182ed21418a0dda8a813651f2461e493a2e9b4ac55240e20710de157de1582778761de16a3
7
- data.tar.gz: f1ec5443a2fd3a574460318171438d77511a4450f273f44d03b8f9674c26b38b01b30dfed56dff53c84dc7a06d4cb3dc7cdfebeccfc089885727e17f6901309c
6
+ metadata.gz: 3a9c6b660fba1f14a7429d73879cef6cc28790ecb8136576d580e3ad388a35cfdd12d4f8fdcee7d67b0b5427bc0a290db942359d73fa73faec06f2cc119cc2d3
7
+ data.tar.gz: a0bd05411135bfdbb77dd51091445bf5d3e556c1f05a7afbe5bc60787ec8c2c276e487ec3bd60807a8f4b89b3853f25ae631de76648498f45d05e3aa8c1c3478
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.2
1
+ 0.3.3
data/cw.gemspec CHANGED
@@ -19,16 +19,16 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib", "audio", "data/text", "test"]
20
20
 
21
21
  spec.required_ruby_version = '>= 2.0.0'
22
- spec.add_runtime_dependency 'oga', '~>2.7'
22
+ spec.add_runtime_dependency 'oga', '~> 2.8'
23
23
  spec.add_runtime_dependency 'httpclient'
24
24
  spec.add_runtime_dependency 'htmlentities', '>= 4.3.4'
25
25
  spec.add_runtime_dependency 'paint', '>= 1.0.1'
26
26
  spec.add_runtime_dependency 'rake', '>= 11.2.2'
27
27
  spec.add_runtime_dependency 'ruby-progressbar', '>= 1.8.1'
28
- spec.add_runtime_dependency 'sanitize', '~> 4.4.0'
29
28
  spec.add_runtime_dependency 'wavefile', '>= 0.7.0'
30
29
  spec.add_runtime_dependency 'parseconfig', '~> 1.0.8'
31
30
  spec.add_runtime_dependency 'rubyserial', '~> 0.4.0'
31
+
32
32
  spec.add_dependency 'os', '~> 0.9.6'
33
33
 
34
34
  spec.add_development_dependency 'version', '>= 1.0.0'
data/lib/cw.rb CHANGED
@@ -16,6 +16,7 @@ require_relative 'cw/alphabet'
16
16
  require_relative 'cw/numbers'
17
17
  require_relative 'cw/str'
18
18
  require_relative 'cw/rss'
19
+ require_relative 'cw/rss_clean'
19
20
  require_relative 'cw/words'
20
21
  require_relative 'cw/cl'
21
22
  require_relative 'cw/key_input'
@@ -3,7 +3,6 @@
3
3
  require 'oga'
4
4
  require 'httpclient'
5
5
  require "htmlentities"
6
- require 'sanitize'
7
6
 
8
7
  module CWG
9
8
 
@@ -41,8 +40,11 @@ module CWG
41
40
  unless(title.include?('VIDEO:') ||
42
41
  title.include?('In pictures:') ||
43
42
  title.include?('Morning business round-up'))
44
- @rss_articles << Sanitize.clean(coder.decode(title)) + '. ' +
45
- Sanitize.clean(coder.decode(description))
43
+ clean_title = CWG::RSSClean.new(title).scrub
44
+ clean_desc = CWG::RSSClean.new(description).scrub
45
+ # @rss_articles << Sanitize.clean(coder.decode(title)) + '. ' +
46
+ # Sanitize.clean(coder.decode(description))
47
+ @rss_articles << clean_title + '. ' + clean_desc
46
48
  count += 1
47
49
  break if count >= article_count
48
50
  end
@@ -0,0 +1,41 @@
1
+ require 'oga'
2
+
3
+ module CWG
4
+ class RSSClean
5
+ def initialize(html_fragment)
6
+ @html_fragment = html_fragment
7
+ end
8
+
9
+ def scrub(options = {})
10
+ blacklisted_tags = NON_CONTENT_TAGS + options.fetch(:blacklist, [])
11
+
12
+ sanitize(Oga.parse_html(html_fragment).children, blacklisted_tags)
13
+ end
14
+
15
+ private
16
+
17
+ attr_reader :html_fragment
18
+
19
+ NON_CONTENT_TAGS = %w(script style)
20
+ WHITESPACE_CONTENT_TAGS = %w(address article aside blockquote br dd div dl dt footer h1 h2 h3 h4 h5 h6 header hgroup hr li nav ol p pre section ul)
21
+
22
+ def sanitize(node_set, blacklisted_tags)
23
+ node_set.reject { |node| !text?(node) && blacklisted_tags.include?(node.name) }
24
+ .flat_map { |node| [whitespace(node, :prefix), text(node, blacklisted_tags), whitespace(node, :suffix)] }.join
25
+ end
26
+
27
+ def text?(node)
28
+ node.is_a?(Oga::XML::Text)
29
+ end
30
+
31
+ def whitespace(node, _position)
32
+ return ' ' if !text?(node) && WHITESPACE_CONTENT_TAGS.include?(node.name)
33
+ ''
34
+ end
35
+
36
+ def text(node, blacklisted_tags)
37
+ return node.text if text?(node)
38
+ sanitize(node.children, blacklisted_tags)
39
+ end
40
+ end
41
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martyn Jago
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.7'
19
+ version: '2.8'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.7'
26
+ version: '2.8'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: httpclient
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: 1.8.1
97
- - !ruby/object:Gem::Dependency
98
- name: sanitize
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: 4.4.0
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: 4.4.0
111
97
  - !ruby/object:Gem::Dependency
112
98
  name: wavefile
113
99
  requirement: !ruby/object:Gem::Requirement
@@ -310,6 +296,7 @@ files:
310
296
  - lib/cw/repeat_word.rb
311
297
  - lib/cw/reveal.rb
312
298
  - lib/cw/rss.rb
299
+ - lib/cw/rss_clean.rb
313
300
  - lib/cw/sentence.rb
314
301
  - lib/cw/speak.rb
315
302
  - lib/cw/spoken.rb