tenet 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d95fc0a698fc1275f6c1d3980fc06db6e67080e89cbf6fef3964c16bba7e99c4
4
+ data.tar.gz: 61a30e95a4804e3248b312b67b7d6b6071e91b27d512ba3b827c679c05719868
5
+ SHA512:
6
+ metadata.gz: 45a24af30c860b04ea37b71e7bc2495f37d501b7c3113a54754564d637bf0cdb616951439e23b6967793040927324abaab0d25c8e2a8c0404cbca476c6e1b7bd
7
+ data.tar.gz: ce5123dd2816bbef874509d33fa816f72c23292e44ab540d0c9a170e12457d26a07cd1231bc06e32b86d7a3cdfd6f06d797a3acf136e27a29b07030a649ec52d
data/.DS_Store ADDED
Binary file
data/.rspec ADDED
@@ -0,0 +1,4 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
4
+ --require rspec-power_assert
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.0.3
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ group :development do
6
+ gem "rake", "~> 13.0"
7
+ gem "rspec", "~> 3.0"
8
+ gem "rspec-power_assert"
9
+ gem "webrick"
10
+ gem "yard"
11
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,38 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.5.0)
5
+ power_assert (1.1.7)
6
+ rake (13.0.6)
7
+ rspec (3.10.0)
8
+ rspec-core (~> 3.10.0)
9
+ rspec-expectations (~> 3.10.0)
10
+ rspec-mocks (~> 3.10.0)
11
+ rspec-core (3.10.1)
12
+ rspec-support (~> 3.10.0)
13
+ rspec-expectations (3.10.1)
14
+ diff-lcs (>= 1.2.0, < 2.0)
15
+ rspec-support (~> 3.10.0)
16
+ rspec-mocks (3.10.2)
17
+ diff-lcs (>= 1.2.0, < 2.0)
18
+ rspec-support (~> 3.10.0)
19
+ rspec-power_assert (1.1.0)
20
+ power_assert (~> 1.1.0)
21
+ rspec (>= 2.14)
22
+ rspec-support (3.10.3)
23
+ webrick (1.7.0)
24
+ yard (0.9.27)
25
+ webrick (~> 1.7.0)
26
+
27
+ PLATFORMS
28
+ ruby
29
+
30
+ DEPENDENCIES
31
+ rake (~> 13.0)
32
+ rspec (~> 3.0)
33
+ rspec-power_assert
34
+ webrick
35
+ yard
36
+
37
+ BUNDLED WITH
38
+ 2.2.32
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # Tenet
2
+
3
+ Tenet restores HTML elements views to Array of Hash data.
4
+ It provides a API for mapping to HTML scraping rules and data attributes.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'tenet'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle install
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install tenet
21
+
22
+ ## Usage
23
+
24
+ Import from a index page:
25
+
26
+ ```ruby
27
+ require "tenet"
28
+
29
+ class Events < Tenet::IndexPage
30
+ url "https://example.com/events"
31
+
32
+ row ".event"
33
+
34
+ attribute :name, css: ".event__title", type: :string
35
+ attribute :link, css: ".event__link", type: :link
36
+ attribute :thumbnail, css: ".event__thumbnail", type: :img
37
+ attribute :status, css: ".event__status", type: :string
38
+ end
39
+
40
+ Events.new.to_a
41
+ ```
42
+
43
+ Import from multiple show pages:
44
+
45
+ ```ruby
46
+
47
+ require "tenet"
48
+
49
+ class Events < Tenet::MultiPage
50
+ urls [
51
+ "https://example.com/events/1",
52
+ "https://example.com/events/2",
53
+ "https://example.com/events/3"
54
+ ]
55
+
56
+ attribute :name, css: ".event__title", type: :string
57
+ attribute :link, css: ".event__link", type: :link
58
+ attribute :thumbnail, css: ".event__thumbnail", type: :img
59
+ attribute :status, css: ".event__status", type: :string
60
+ end
61
+
62
+ Events.new.to_a
63
+ ```
64
+
65
+ ## Development
66
+
67
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
68
+
69
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+ require "yard"
6
+
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ task default: :spec
10
+
11
+ YARD::Rake::YardocTask.new do |t|
12
+ t.files = ['lib/**/*.rb']
13
+ end
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "tenet"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,19 @@
1
+ module Tenet
2
+ class Attribute
3
+ TYPE_OPTIONS = [:string, :img, :link]
4
+
5
+ attr_accessor :name, :css, :type
6
+
7
+ #
8
+ # @param [Symbol] name
9
+ # @param [String] css
10
+ # @param [Symbol] type
11
+ #
12
+ def initialize(name, css:, type: :string)
13
+ raise ArgumentError unless TYPE_OPTIONS.include?(type)
14
+ @name = name
15
+ @css = css
16
+ @type = type
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,48 @@
1
+ module Tenet
2
+ class IndexPage
3
+
4
+ @@url = ""
5
+ @@row = ""
6
+ @@attributes = []
7
+
8
+ class << self
9
+ #
10
+ # @param [String] url
11
+ #
12
+ def url(url)
13
+ @@url = url
14
+ end
15
+
16
+ #
17
+ # @param [String] css
18
+ #
19
+ def row(css)
20
+ @@row = css
21
+ end
22
+
23
+ #
24
+ # @param [Symbol] name
25
+ # @param [String] css
26
+ # @param [Symbol] type
27
+ #
28
+ def attribute(name, css:, type: :string)
29
+ @@attributes << Attribute.new(name, css: css, type: type)
30
+ end
31
+ end
32
+
33
+ def initialize
34
+ @collection = []
35
+
36
+ Page.new(@@url).document.css(@@row).each do |row_document|
37
+ @collection << Row.new(row_document.to_html, @@attributes).to_h
38
+ end
39
+ end
40
+
41
+ #
42
+ # @return [Array<Hash>]
43
+ #
44
+ def to_a
45
+ @collection
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,41 @@
1
+ module Tenet
2
+ class MultiPages
3
+
4
+ @@urls = []
5
+ @@attributes = []
6
+
7
+ class << self
8
+ #
9
+ # @param [Array] urls
10
+ #
11
+ def urls(urls)
12
+ @@urls = urls
13
+ end
14
+
15
+ #
16
+ # @param [Symbol] name
17
+ # @param [String] css
18
+ # @param [Symbol] type
19
+ #
20
+ def attribute(name, css:, type: :string)
21
+ @@attributes << Attribute.new(name, css: css, type: type)
22
+ end
23
+ end
24
+
25
+ def initialize
26
+ @collection = []
27
+
28
+ @@urls.each do |url|
29
+ html = Page.new(url).html
30
+ @collection << Row.new(html, @@attributes).to_h
31
+ end
32
+ end
33
+
34
+ #
35
+ # @return [Array<Hash>]
36
+ #
37
+ def to_a
38
+ @collection
39
+ end
40
+ end
41
+ end
data/lib/tenet/page.rb ADDED
@@ -0,0 +1,17 @@
1
+ require "nokogiri"
2
+ require "open-uri"
3
+
4
+ module Tenet
5
+ class Page
6
+
7
+ attr_accessor :html, :document
8
+
9
+ #
10
+ # @param [String] url
11
+ #
12
+ def initialize(url, parser = Nokogiri::HTML5)
13
+ @html = URI.open(url).string
14
+ @document = parser.parse(@html)
15
+ end
16
+ end
17
+ end
data/lib/tenet/row.rb ADDED
@@ -0,0 +1,39 @@
1
+ require "nokogiri"
2
+
3
+ module Tenet
4
+ class Row
5
+ #
6
+ # @param [String] html
7
+ # @param [Hash] attributes
8
+ #
9
+ def initialize(html, attributes, parser = Nokogiri::HTML5)
10
+ @html = html
11
+ @attributes = attributes
12
+ @document = parser.parse(@html)
13
+ end
14
+
15
+ #
16
+ # @return [Hash]
17
+ #
18
+ def to_h
19
+ values = {}
20
+
21
+ @attributes.each do |attribute|
22
+ selector = @document.css(attribute.css).first
23
+
24
+ case attribute.type
25
+ when :string
26
+ value = selector.content
27
+ when :link
28
+ value = selector[:href]
29
+ when :img
30
+ value = selector[:src]
31
+ end
32
+
33
+ values[attribute.name] = value
34
+ end
35
+
36
+ values
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,3 @@
1
+ module Tenet
2
+ VERSION = "1.0.0"
3
+ end
data/lib/tenet.rb ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ Dir.glob(__dir__ + "/tenet/**/*.rb").each { |f| require_relative f }
4
+
5
+ module Tenet
6
+ end
data/tenet.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/tenet/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "tenet"
7
+ spec.version = Tenet::VERSION
8
+ spec.authors = ["moekidev"]
9
+ spec.email = ["me@moeki.dev"]
10
+ spec.license = "MIT"
11
+
12
+ spec.summary = "Tenet restores HTML elements views to Array of Hash data."
13
+ spec.description = <<~EOF
14
+ Tenet restores HTML elements views to Array of Hash data.
15
+ It provides a API for mapping to HTML scraping rules and
16
+ data attributes.
17
+ EOF
18
+ spec.homepage = "https://github.com/moekidev/tenet"
19
+ spec.required_ruby_version = ">= 2.6.0"
20
+
21
+ spec.metadata["homepage_uri"] = spec.homepage
22
+ spec.metadata["source_code_uri"] = "https://github.com/moekidev/tenet"
23
+
24
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
25
+ `git ls-files -z`.split("\x0").reject do |f|
26
+ (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
27
+ end
28
+ end
29
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+
32
+ spec.add_dependency "nokogiri", "~> 1.0"
33
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tenet
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - moekidev
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ description: |
28
+ Tenet restores HTML elements views to Array of Hash data.
29
+ It provides a API for mapping to HTML scraping rules and
30
+ data attributes.
31
+ email:
32
+ - me@moeki.dev
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - ".DS_Store"
38
+ - ".rspec"
39
+ - ".ruby-version"
40
+ - Gemfile
41
+ - Gemfile.lock
42
+ - README.md
43
+ - Rakefile
44
+ - bin/console
45
+ - bin/setup
46
+ - lib/tenet.rb
47
+ - lib/tenet/attribute.rb
48
+ - lib/tenet/index_page.rb
49
+ - lib/tenet/multi_pages.rb
50
+ - lib/tenet/page.rb
51
+ - lib/tenet/row.rb
52
+ - lib/tenet/version.rb
53
+ - tenet.gemspec
54
+ homepage: https://github.com/moekidev/tenet
55
+ licenses:
56
+ - MIT
57
+ metadata:
58
+ homepage_uri: https://github.com/moekidev/tenet
59
+ source_code_uri: https://github.com/moekidev/tenet
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 2.6.0
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubygems_version: 3.2.32
76
+ signing_key:
77
+ specification_version: 4
78
+ summary: Tenet restores HTML elements views to Array of Hash data.
79
+ test_files: []