tenet 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d95fc0a698fc1275f6c1d3980fc06db6e67080e89cbf6fef3964c16bba7e99c4
4
+ data.tar.gz: 61a30e95a4804e3248b312b67b7d6b6071e91b27d512ba3b827c679c05719868
5
+ SHA512:
6
+ metadata.gz: 45a24af30c860b04ea37b71e7bc2495f37d501b7c3113a54754564d637bf0cdb616951439e23b6967793040927324abaab0d25c8e2a8c0404cbca476c6e1b7bd
7
+ data.tar.gz: ce5123dd2816bbef874509d33fa816f72c23292e44ab540d0c9a170e12457d26a07cd1231bc06e32b86d7a3cdfd6f06d797a3acf136e27a29b07030a649ec52d
data/.DS_Store ADDED
Binary file
data/.rspec ADDED
@@ -0,0 +1,4 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
4
+ --require rspec-power_assert
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.0.3
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ group :development do
6
+ gem "rake", "~> 13.0"
7
+ gem "rspec", "~> 3.0"
8
+ gem "rspec-power_assert"
9
+ gem "webrick"
10
+ gem "yard"
11
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,38 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.5.0)
5
+ power_assert (1.1.7)
6
+ rake (13.0.6)
7
+ rspec (3.10.0)
8
+ rspec-core (~> 3.10.0)
9
+ rspec-expectations (~> 3.10.0)
10
+ rspec-mocks (~> 3.10.0)
11
+ rspec-core (3.10.1)
12
+ rspec-support (~> 3.10.0)
13
+ rspec-expectations (3.10.1)
14
+ diff-lcs (>= 1.2.0, < 2.0)
15
+ rspec-support (~> 3.10.0)
16
+ rspec-mocks (3.10.2)
17
+ diff-lcs (>= 1.2.0, < 2.0)
18
+ rspec-support (~> 3.10.0)
19
+ rspec-power_assert (1.1.0)
20
+ power_assert (~> 1.1.0)
21
+ rspec (>= 2.14)
22
+ rspec-support (3.10.3)
23
+ webrick (1.7.0)
24
+ yard (0.9.27)
25
+ webrick (~> 1.7.0)
26
+
27
+ PLATFORMS
28
+ ruby
29
+
30
+ DEPENDENCIES
31
+ rake (~> 13.0)
32
+ rspec (~> 3.0)
33
+ rspec-power_assert
34
+ webrick
35
+ yard
36
+
37
+ BUNDLED WITH
38
+ 2.2.32
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # Tenet
2
+
3
+ Tenet restores HTML elements views to Array of Hash data.
4
+ It provides a API for mapping to HTML scraping rules and data attributes.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'tenet'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle install
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install tenet
21
+
22
+ ## Usage
23
+
24
+ Import from a index page:
25
+
26
+ ```ruby
27
+ require "tenet"
28
+
29
+ class Events < Tenet::IndexPage
30
+ url "https://example.com/events"
31
+
32
+ row ".event"
33
+
34
+ attribute :name, css: ".event__title", type: :string
35
+ attribute :link, css: ".event__link", type: :link
36
+ attribute :thumbnail, css: ".event__thumbnail", type: :img
37
+ attribute :status, css: ".event__status", type: :string
38
+ end
39
+
40
+ Events.new.to_a
41
+ ```
42
+
43
+ Import from multiple show pages:
44
+
45
+ ```ruby
46
+
47
+ require "tenet"
48
+
49
+ class Events < Tenet::MultiPage
50
+ urls [
51
+ "https://example.com/events/1",
52
+ "https://example.com/events/2",
53
+ "https://example.com/events/3"
54
+ ]
55
+
56
+ attribute :name, css: ".event__title", type: :string
57
+ attribute :link, css: ".event__link", type: :link
58
+ attribute :thumbnail, css: ".event__thumbnail", type: :img
59
+ attribute :status, css: ".event__status", type: :string
60
+ end
61
+
62
+ Events.new.to_a
63
+ ```
64
+
65
+ ## Development
66
+
67
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
68
+
69
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+ require "yard"
6
+
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ task default: :spec
10
+
11
+ YARD::Rake::YardocTask.new do |t|
12
+ t.files = ['lib/**/*.rb']
13
+ end
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "tenet"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,19 @@
1
+ module Tenet
2
+ class Attribute
3
+ TYPE_OPTIONS = [:string, :img, :link]
4
+
5
+ attr_accessor :name, :css, :type
6
+
7
+ #
8
+ # @param [Symbol] name
9
+ # @param [String] css
10
+ # @param [Symbol] type
11
+ #
12
+ def initialize(name, css:, type: :string)
13
+ raise ArgumentError unless TYPE_OPTIONS.include?(type)
14
+ @name = name
15
+ @css = css
16
+ @type = type
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,48 @@
1
+ module Tenet
2
+ class IndexPage
3
+
4
+ @@url = ""
5
+ @@row = ""
6
+ @@attributes = []
7
+
8
+ class << self
9
+ #
10
+ # @param [String] url
11
+ #
12
+ def url(url)
13
+ @@url = url
14
+ end
15
+
16
+ #
17
+ # @param [String] css
18
+ #
19
+ def row(css)
20
+ @@row = css
21
+ end
22
+
23
+ #
24
+ # @param [Symbol] name
25
+ # @param [String] css
26
+ # @param [Symbol] type
27
+ #
28
+ def attribute(name, css:, type: :string)
29
+ @@attributes << Attribute.new(name, css: css, type: type)
30
+ end
31
+ end
32
+
33
+ def initialize
34
+ @collection = []
35
+
36
+ Page.new(@@url).document.css(@@row).each do |row_document|
37
+ @collection << Row.new(row_document.to_html, @@attributes).to_h
38
+ end
39
+ end
40
+
41
+ #
42
+ # @return [Array<Hash>]
43
+ #
44
+ def to_a
45
+ @collection
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,41 @@
1
+ module Tenet
2
+ class MultiPages
3
+
4
+ @@urls = []
5
+ @@attributes = []
6
+
7
+ class << self
8
+ #
9
+ # @param [Array] urls
10
+ #
11
+ def urls(urls)
12
+ @@urls = urls
13
+ end
14
+
15
+ #
16
+ # @param [Symbol] name
17
+ # @param [String] css
18
+ # @param [Symbol] type
19
+ #
20
+ def attribute(name, css:, type: :string)
21
+ @@attributes << Attribute.new(name, css: css, type: type)
22
+ end
23
+ end
24
+
25
+ def initialize
26
+ @collection = []
27
+
28
+ @@urls.each do |url|
29
+ html = Page.new(url).html
30
+ @collection << Row.new(html, @@attributes).to_h
31
+ end
32
+ end
33
+
34
+ #
35
+ # @return [Array<Hash>]
36
+ #
37
+ def to_a
38
+ @collection
39
+ end
40
+ end
41
+ end
data/lib/tenet/page.rb ADDED
@@ -0,0 +1,17 @@
1
+ require "nokogiri"
2
+ require "open-uri"
3
+
4
+ module Tenet
5
+ class Page
6
+
7
+ attr_accessor :html, :document
8
+
9
+ #
10
+ # @param [String] url
11
+ #
12
+ def initialize(url, parser = Nokogiri::HTML5)
13
+ @html = URI.open(url).string
14
+ @document = parser.parse(@html)
15
+ end
16
+ end
17
+ end
data/lib/tenet/row.rb ADDED
@@ -0,0 +1,39 @@
1
+ require "nokogiri"
2
+
3
+ module Tenet
4
+ class Row
5
+ #
6
+ # @param [String] html
7
+ # @param [Hash] attributes
8
+ #
9
+ def initialize(html, attributes, parser = Nokogiri::HTML5)
10
+ @html = html
11
+ @attributes = attributes
12
+ @document = parser.parse(@html)
13
+ end
14
+
15
+ #
16
+ # @return [Hash]
17
+ #
18
+ def to_h
19
+ values = {}
20
+
21
+ @attributes.each do |attribute|
22
+ selector = @document.css(attribute.css).first
23
+
24
+ case attribute.type
25
+ when :string
26
+ value = selector.content
27
+ when :link
28
+ value = selector[:href]
29
+ when :img
30
+ value = selector[:src]
31
+ end
32
+
33
+ values[attribute.name] = value
34
+ end
35
+
36
+ values
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,3 @@
1
+ module Tenet
2
+ VERSION = "1.0.0"
3
+ end
data/lib/tenet.rb ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ Dir.glob(__dir__ + "/tenet/**/*.rb").each { |f| require_relative f }
4
+
5
+ module Tenet
6
+ end
data/tenet.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/tenet/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "tenet"
7
+ spec.version = Tenet::VERSION
8
+ spec.authors = ["moekidev"]
9
+ spec.email = ["me@moeki.dev"]
10
+ spec.license = "MIT"
11
+
12
+ spec.summary = "Tenet restores HTML elements views to Array of Hash data."
13
+ spec.description = <<~EOF
14
+ Tenet restores HTML elements views to Array of Hash data.
15
+ It provides a API for mapping to HTML scraping rules and
16
+ data attributes.
17
+ EOF
18
+ spec.homepage = "https://github.com/moekidev/tenet"
19
+ spec.required_ruby_version = ">= 2.6.0"
20
+
21
+ spec.metadata["homepage_uri"] = spec.homepage
22
+ spec.metadata["source_code_uri"] = "https://github.com/moekidev/tenet"
23
+
24
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
25
+ `git ls-files -z`.split("\x0").reject do |f|
26
+ (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
27
+ end
28
+ end
29
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+
32
+ spec.add_dependency "nokogiri", "~> 1.0"
33
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tenet
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - moekidev
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ description: |
28
+ Tenet restores HTML elements views to Array of Hash data.
29
+ It provides a API for mapping to HTML scraping rules and
30
+ data attributes.
31
+ email:
32
+ - me@moeki.dev
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - ".DS_Store"
38
+ - ".rspec"
39
+ - ".ruby-version"
40
+ - Gemfile
41
+ - Gemfile.lock
42
+ - README.md
43
+ - Rakefile
44
+ - bin/console
45
+ - bin/setup
46
+ - lib/tenet.rb
47
+ - lib/tenet/attribute.rb
48
+ - lib/tenet/index_page.rb
49
+ - lib/tenet/multi_pages.rb
50
+ - lib/tenet/page.rb
51
+ - lib/tenet/row.rb
52
+ - lib/tenet/version.rb
53
+ - tenet.gemspec
54
+ homepage: https://github.com/moekidev/tenet
55
+ licenses:
56
+ - MIT
57
+ metadata:
58
+ homepage_uri: https://github.com/moekidev/tenet
59
+ source_code_uri: https://github.com/moekidev/tenet
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 2.6.0
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubygems_version: 3.2.32
76
+ signing_key:
77
+ specification_version: 4
78
+ summary: Tenet restores HTML elements views to Array of Hash data.
79
+ test_files: []