deba 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ad9e94cee53cfd87b42e07aa49768998c7426135
4
+ data.tar.gz: 65034ed3fe3b044c1537a785c7e9dcd5c8964303
5
+ SHA512:
6
+ metadata.gz: d6f759d4ea466c431aad03f62f65fa5e23ef14881d3ae4532497c1d9a2e869fd282807d434ddd44b5fe1e8b1e089afdac53bc3de62f660b9be896a9fb25aeb0d
7
+ data.tar.gz: 51a467840d948a621df946a1d8507c8cd0a3f902f4bd5f35079e014d5c5f1486a7ddd918137cf4b3c3fbbc0f8e484e8ad4dd5de88fb153b4fde915bb46ec94c1
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in deba.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 Brenton "B-Train" Fletcher
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # Deba
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/deba`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'deba'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install deba
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/deba.
36
+
37
+
38
+ ## License
39
+
40
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
41
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "deba"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/deba.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'deba/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "deba"
8
+ spec.version = Deba::VERSION
9
+ spec.authors = ["Brenton \"B-Train\" Fletcher"]
10
+ spec.email = ["i@bloople.net"]
11
+
12
+ spec.summary = %q{Deba}
13
+ spec.description = %q{Deba}
14
+ spec.homepage = "http://example.com"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.13"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_dependency "nokogiri"
27
+ end
data/lib/deba/break.rb ADDED
@@ -0,0 +1,5 @@
1
+ class Deba::Break
2
+ def to_s
3
+ "\n"
4
+ end
5
+ end
@@ -0,0 +1,82 @@
1
+ class Deba::Extractor
2
+ HEADING_TAGS = %w(h1 h2 h3 h4 h5 h6)
3
+ BLOCK_INITIATING_TAGS = %w(article aside body blockquote dd dt header li nav ol p pre section td th ul)
4
+ ENHANCERS = { %w(b strong) => "*", %(i em) => "_" }
5
+
6
+ attr_reader :blocks
7
+
8
+ def initialize(doc)
9
+ @doc = doc
10
+ end
11
+
12
+ def extract
13
+ @blocks = []
14
+ @just_appended_br = false
15
+ @text_run = Deba::TextRunner.new(self)
16
+
17
+ process(@doc.root)
18
+
19
+ @blocks.map { |block| block.to_s }.join
20
+ end
21
+
22
+ def process(node)
23
+ node_name = node.name.downcase
24
+
25
+ return if node_name == 'head'
26
+
27
+ #Handle repeated brs by making a paragraph break
28
+ if node_name == 'br'
29
+ if @just_appended_br
30
+ @just_appended_br = false
31
+
32
+ @text_run.break
33
+
34
+ return
35
+ else
36
+ @just_appended_br = true
37
+ end
38
+ elsif @just_appended_br
39
+ @just_appended_br = false
40
+
41
+ @text_run << Deba::Break.new
42
+ end
43
+
44
+ if node.text?
45
+ @text_run << node.inner_text if Deba::Utils.present?(node.inner_text)
46
+
47
+ return
48
+ end
49
+
50
+ if ENHANCERS.keys.flatten.include?(node_name)
51
+ ENHANCERS.each_pair do |tags, nsf_rep|
52
+ if tags.include?(node_name)
53
+ @text_run << nsf_rep
54
+ node.children.each { |n| process(n) }
55
+ @text_run << nsf_rep
56
+ end
57
+ end
58
+
59
+ return
60
+ end
61
+
62
+ #These tags terminate the current paragraph, if present, and start a new paragraph
63
+ if BLOCK_INITIATING_TAGS.include?(node_name)
64
+ @text_run.break(Deba::Paragraph)
65
+ node.children.each { |n| process(n) }
66
+ @text_run.break
67
+
68
+ return
69
+ end
70
+
71
+ if HEADING_TAGS.include?(node_name)
72
+ @text_run.break(Deba::Heading, node_name[1..-1].to_i)
73
+ node.children.each { |n| process(n) }
74
+ @text_run.break
75
+
76
+ return
77
+ end
78
+
79
+ #Pretend that the children of this node were siblings of this node (move them one level up the tree)
80
+ node.children.each { |n| process(n) }
81
+ end
82
+ end
@@ -0,0 +1,12 @@
1
+ class Deba::Heading
2
+ attr_reader :segments, :level
3
+
4
+ def initialize(segments, level)
5
+ @segments = segments
6
+ @level = level
7
+ end
8
+
9
+ def to_s
10
+ "#{"#" * @level} #{Deba::Stringifier.new(@segments).stringify}\n\n"
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ class Deba::Paragraph
2
+ attr_reader :segments
3
+
4
+ def initialize(segments)
5
+ @segments = segments
6
+ end
7
+
8
+ def to_s
9
+ "#{Deba::Stringifier.new(@segments).stringify}\n\n"
10
+ end
11
+ end
@@ -0,0 +1,17 @@
1
+ class Deba::Stringifier
2
+ def initialize(segments)
3
+ @segments = segments
4
+ end
5
+
6
+ def stringify
7
+ chunks = @segments.chunk { |segment| segment.class }
8
+
9
+ chunks.map do |type, chunk_segments|
10
+ if type == String
11
+ Deba::Utils.normalise(chunk_segments.join)
12
+ elsif type == Deba::Break
13
+ chunk_segments.map { |s| s.to_s }.join
14
+ end
15
+ end.join
16
+ end
17
+ end
@@ -0,0 +1,33 @@
1
+ class Deba::TextRunner
2
+ def initialize(extractor)
3
+ @extractor = extractor
4
+
5
+ start
6
+ end
7
+
8
+ def <<(segment)
9
+ @segments << segment
10
+ end
11
+
12
+ def break(block_type = Deba::Paragraph, param = nil)
13
+ finish
14
+ start(block_type, param)
15
+ end
16
+
17
+ def finish
18
+ return unless present?
19
+
20
+ block = @param.nil? ? @block_type.new(@segments) : @block_type.new(@segments, @param)
21
+ @extractor.blocks << block
22
+ end
23
+
24
+ def start(block_type = Deba::Paragraph, param = nil)
25
+ @segments = []
26
+ @block_type = block_type
27
+ @param = param
28
+ end
29
+
30
+ def present?
31
+ @segments.any? { |segment| segment.is_a?(String) && Deba::Utils.present?(segment) }
32
+ end
33
+ end
data/lib/deba/utils.rb ADDED
@@ -0,0 +1,15 @@
1
+ class Deba::Utils
2
+ BLANK_RE = /\A[[:space:]]*\z/
3
+
4
+ def self.blank?(text)
5
+ text.empty? || text =~ BLANK_RE
6
+ end
7
+
8
+ def self.present?(text)
9
+ !blank?(text)
10
+ end
11
+
12
+ def self.normalise(text)
13
+ text.gsub(/[[:space:]]+/, ' ').strip
14
+ end
15
+ end
@@ -0,0 +1,3 @@
1
+ module Deba
2
+ VERSION = "0.1.0"
3
+ end
data/lib/deba.rb ADDED
@@ -0,0 +1,19 @@
1
+ require "nokogiri"
2
+
3
+ module Deba
4
+ end
5
+
6
+ require "deba/version"
7
+ require "deba/utils"
8
+ require "deba/stringifier"
9
+ require "deba/break"
10
+ require "deba/heading"
11
+ require "deba/paragraph"
12
+ require "deba/text_runner"
13
+ require "deba/extractor"
14
+
15
+ module Deba
16
+ def self.extract(html)
17
+ Deba::Extractor.new(html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML(html)).extract
18
+ end
19
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: deba
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Brenton "B-Train" Fletcher
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-01-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.13'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Deba
56
+ email:
57
+ - i@bloople.net
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - Gemfile
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - bin/console
68
+ - bin/setup
69
+ - deba.gemspec
70
+ - lib/deba.rb
71
+ - lib/deba/break.rb
72
+ - lib/deba/extractor.rb
73
+ - lib/deba/heading.rb
74
+ - lib/deba/paragraph.rb
75
+ - lib/deba/stringifier.rb
76
+ - lib/deba/text_runner.rb
77
+ - lib/deba/utils.rb
78
+ - lib/deba/version.rb
79
+ homepage: http://example.com
80
+ licenses:
81
+ - MIT
82
+ metadata: {}
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 2.6.8
100
+ signing_key:
101
+ specification_version: 4
102
+ summary: Deba
103
+ test_files: []