lac 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/bin/lac +6 -0
- data/lib/lac.rb +9 -0
- data/lib/lac/application.rb +98 -0
- data/lib/lac/base.rb +4 -0
- data/lib/lac/base_model.rb +77 -0
- data/lib/lac/cli.rb +96 -0
- data/lib/lac/page.rb +77 -0
- data/views/base.html.haml +19 -0
- data/views/home.html.haml +1 -0
- data/views/model.html.haml +18 -0
- data/views/models.html.haml +15 -0
- data/views/scrapers.html.haml +11 -0
- metadata +140 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ca244cdc01499a1bae292429eafed751e4555e0a
|
4
|
+
data.tar.gz: f44e2769c311b844e0d12656fc2697b0a6eba6ee
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c71bdd27d36a4f374ab624b12f0c40c6ae7a91a6fa8815fd13691ffa597b8a4c967f0ab3e4a46e26afb7b23e4bbc9834f148ab92cc76cc27681a2f59d347387f
|
7
|
+
data.tar.gz: 7a80d9f99c59034a2c647da55cd7e34205d0dd94b56f4bc9e92c7c8987ff22dac4546f0e334e6decda4b53231dc1bc7ac560cf860f32f200d59582d5ed187a1e
|
data/Gemfile
ADDED
data/bin/lac
ADDED
data/lib/lac.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'haml'
|
3
|
+
|
4
|
+
require 'active_support/all'
|
5
|
+
|
6
|
+
require 'open-uri'
|
7
|
+
require 'uri'
|
8
|
+
require 'csv'
|
9
|
+
|
10
|
+
require 'lac'
|
11
|
+
require 'sinatra'
|
12
|
+
require "sinatra/base"
|
13
|
+
|
14
|
+
module Lac
|
15
|
+
class Application < Sinatra::Base
|
16
|
+
|
17
|
+
def render_file(file, params= {})
|
18
|
+
gemdir = Gem.loaded_specs["lac"].gem_dir
|
19
|
+
base_template = File.read(gemdir + "/views/base.html.haml")
|
20
|
+
content = Haml::Engine.new(File.read(gemdir + "/views/#{file}.html.haml")).render(Object.new, models: @@models, scrapers: @@scrapers, **params)
|
21
|
+
return Haml::Engine.new(base_template).render(Object.new, content: content, models: @@models, scrapers: @@scrapers)
|
22
|
+
end
|
23
|
+
|
24
|
+
model_files = Dir["./models/*"]
|
25
|
+
helper_files = Dir["./helpers/*"]
|
26
|
+
scraper_files = Dir["./scrapers/*"]
|
27
|
+
|
28
|
+
data = {}
|
29
|
+
|
30
|
+
@@scrapers = []
|
31
|
+
@@models = []
|
32
|
+
|
33
|
+
model_files.each do |filename|
|
34
|
+
require filename
|
35
|
+
puts filename
|
36
|
+
@@models << Object.const_get(filename.split("/").last.split(".").first.camelize)
|
37
|
+
puts Object.const_get(filename.split("/").last.split(".").first.camelize)
|
38
|
+
end
|
39
|
+
|
40
|
+
helper_files.each do |filename|
|
41
|
+
require filename
|
42
|
+
puts filename
|
43
|
+
end
|
44
|
+
|
45
|
+
scraper_files.each do |filename|
|
46
|
+
require filename
|
47
|
+
puts filename
|
48
|
+
@@scrapers << Object.const_get(filename.split("/").last.split(".").first.camelize)
|
49
|
+
Object.const_get(filename.split("/").last.split(".").first.camelize).data
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
## System routes
|
56
|
+
|
57
|
+
get '/' do
|
58
|
+
render_file("home")
|
59
|
+
end
|
60
|
+
|
61
|
+
get '/models' do
|
62
|
+
render_file("models")
|
63
|
+
end
|
64
|
+
|
65
|
+
get '/scrapers' do
|
66
|
+
render_file("scrapers")
|
67
|
+
end
|
68
|
+
|
69
|
+
get '/scrapers/:name/scrape' do
|
70
|
+
scraper = @@scrapers.select { |scraper| params[:name] == scraper.name.underscore }.first
|
71
|
+
scraper.data
|
72
|
+
render_file("scrapers")
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
get '/models/:name/csv' do
|
77
|
+
|
78
|
+
content_type 'application/csv'
|
79
|
+
attachment "myfilename.csv"
|
80
|
+
m = @@models.select { |model| params[:name] == model.name.underscore }.first
|
81
|
+
csv_string = CSV.generate(col_sep: ";") do |csv|
|
82
|
+
header_row = ["id"] + m.fields
|
83
|
+
m.all.each do |id, obj|
|
84
|
+
csv << [id] + m.fields.map {|a| obj[a]}
|
85
|
+
end
|
86
|
+
end
|
87
|
+
# render_file("", { })
|
88
|
+
end
|
89
|
+
|
90
|
+
get '/models/:name' do
|
91
|
+
render_file("model", {model: @@models.select { |model| params[:name] == model.name.underscore }.first })
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
puts "Loaded Models #{@@models}"
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
data/lib/lac/base.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
module Lac
|
3
|
+
|
4
|
+
class BaseModel
|
5
|
+
|
6
|
+
# Any suggestions on how to manage these fields
|
7
|
+
# better are welcome!
|
8
|
+
|
9
|
+
@@fields = []
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
end
|
13
|
+
|
14
|
+
def validates?
|
15
|
+
return true unless primary_key
|
16
|
+
self.method(primary_key).call.present?
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.field(symbol)
|
20
|
+
|
21
|
+
@@fields.push(symbol)
|
22
|
+
@@fields = @@fields.uniq
|
23
|
+
attr_accessor symbol
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_hash
|
27
|
+
hash = {}
|
28
|
+
@@fields.each do |field|
|
29
|
+
if field
|
30
|
+
hash[field] = self.method(field).call
|
31
|
+
end
|
32
|
+
end
|
33
|
+
hash
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.fields
|
37
|
+
self.new.fields
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.primary_key
|
41
|
+
self.new.primary_key
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.all
|
45
|
+
begin
|
46
|
+
YAML::load_file("#{self.name}.yml")
|
47
|
+
YAML::load_file("#{self.name}.yml")[:collecion]
|
48
|
+
YAML::load_file("#{self.name}.yml")[:collection]
|
49
|
+
rescue
|
50
|
+
[]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def fields
|
55
|
+
@@fields
|
56
|
+
end
|
57
|
+
|
58
|
+
def save
|
59
|
+
filename = "#{self.class}.yml"
|
60
|
+
FileUtils.touch(filename)
|
61
|
+
if validates? && id
|
62
|
+
collection = YAML::load_file(filename)
|
63
|
+
collection = {} unless collection
|
64
|
+
collection[:collection] = {} unless collection[:collection]
|
65
|
+
collection[:collection][id.to_sym] = to_hash
|
66
|
+
File.open(filename, 'w') {|f| f.write collection.to_yaml }
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def id
|
71
|
+
if self.validates?
|
72
|
+
Digest::SHA256.hexdigest(self.method(self.primary_key).call)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
end
|
data/lib/lac/cli.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'colorize'
|
2
|
+
require 'readline'
|
3
|
+
require 'lac'
|
4
|
+
require 'nokogiri'
|
5
|
+
# require './application.rb'
|
6
|
+
|
7
|
+
|
8
|
+
module Lac
|
9
|
+
class CLI
|
10
|
+
|
11
|
+
def initialize(arguments)
|
12
|
+
@action = arguments[0]
|
13
|
+
end
|
14
|
+
|
15
|
+
def start
|
16
|
+
if @action == "new"
|
17
|
+
create_project
|
18
|
+
elsif @action == "server"
|
19
|
+
start_server
|
20
|
+
else
|
21
|
+
puts "Invalid command, exiting".red.on_white.bold
|
22
|
+
puts "The only valid command is 'new'".green.on_white.bold
|
23
|
+
end
|
24
|
+
exit_cli
|
25
|
+
end
|
26
|
+
|
27
|
+
def start_server
|
28
|
+
Lac::Application.run!
|
29
|
+
end
|
30
|
+
|
31
|
+
def create_project
|
32
|
+
puts "Hello! Welcome to Lac".bold
|
33
|
+
puts "This is a work in progress. Check out the Git repo if you would like to contribute.".bold
|
34
|
+
puts "Type 'exit' to quit"
|
35
|
+
project_name = nil
|
36
|
+
|
37
|
+
while project_name == nil
|
38
|
+
input = prompt_input("Choose a name for your project: ")
|
39
|
+
if File.directory?("./#{input}")
|
40
|
+
puts "A folder with that name already exists, choose another name:"
|
41
|
+
else
|
42
|
+
project_name = input
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
`mkdir #{project_name}`
|
47
|
+
if File.directory?("./#{project_name}")
|
48
|
+
puts "#{"[Success]".green.bold} created directory!\t ./#{project_name}/"
|
49
|
+
|
50
|
+
`mkdir #{project_name}/config`
|
51
|
+
puts "#{"[Success]".green.bold} created directory!\t ./#{project_name}/config"
|
52
|
+
|
53
|
+
`mkdir #{project_name}/cache`
|
54
|
+
puts "#{"[Success]".green.bold} created directory!\t ./#{project_name}/cache"
|
55
|
+
|
56
|
+
`mkdir #{project_name}/models`
|
57
|
+
puts "#{"[Success]".green.bold} created directory!\t ./#{project_name}/models"
|
58
|
+
|
59
|
+
`mkdir #{project_name}/scrapers`
|
60
|
+
puts "#{"[Success]".green.bold} created directory!\t ./#{project_name}/scrapers"
|
61
|
+
|
62
|
+
`mkdir #{project_name}/helpers`
|
63
|
+
puts "#{"[Success]".green.bold} created directory!\t ./#{project_name}/helpers"
|
64
|
+
else
|
65
|
+
puts "Something went wrong. Exiting...".red.bold
|
66
|
+
exit_cli
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
File.open("./#{project_name}/Gemfile", "w") do |f|
|
71
|
+
f.puts "source 'https://rubygems.org'"
|
72
|
+
f.puts "gem 'lac'"
|
73
|
+
end
|
74
|
+
puts "#{"[Success]".green.bold} created file!\t\t ./#{project_name}/Gemfile"
|
75
|
+
|
76
|
+
File.open("./#{project_name}/config/lac.rb", "w") do |f|
|
77
|
+
f.puts ""
|
78
|
+
end
|
79
|
+
puts "#{"[Success]".green.bold} created file!\t\t ./#{project_name}/config/lac.rb"
|
80
|
+
end
|
81
|
+
|
82
|
+
def prompt_input(message)
|
83
|
+
input = Readline.readline(message, true).strip
|
84
|
+
exit_cli if input.downcase === "exit"
|
85
|
+
return input.to_s
|
86
|
+
end
|
87
|
+
|
88
|
+
def exit_cli
|
89
|
+
puts "#{"\nCya!".yellow.on_black.bold}\n"
|
90
|
+
exit
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
# puts Lac::CLI.new.start
|
data/lib/lac/page.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Lac
|
4
|
+
|
5
|
+
class Page
|
6
|
+
|
7
|
+
attr_accessor :html
|
8
|
+
|
9
|
+
# Need better flow for what this does.
|
10
|
+
|
11
|
+
def try_css_attr(css, attr)
|
12
|
+
if element = self.try_css(css)
|
13
|
+
element.attr(attr)
|
14
|
+
else
|
15
|
+
nil
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def try_css_parent_attr(css, attr)
|
20
|
+
if element = try_css(css)
|
21
|
+
element.parent.attr(attr)
|
22
|
+
else
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def try_css(css)
|
28
|
+
self.html.css(css).first
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
# Simply gets a webpage based on a url.
|
33
|
+
# from_cache = true (default) will take a cached version
|
34
|
+
# if it exists.
|
35
|
+
|
36
|
+
def self.get_page(url, from_cache = true)
|
37
|
+
url_hash = Digest::SHA256.hexdigest(url)
|
38
|
+
filename = "cache/#{url_hash}"
|
39
|
+
if from_cache && File.file?(filename)
|
40
|
+
result = open(filename).read
|
41
|
+
puts "Gotten #{filename} from cache"
|
42
|
+
else
|
43
|
+
result = open(url).read
|
44
|
+
File.write(filename, result)
|
45
|
+
puts "Written cache file #{filename}"
|
46
|
+
end
|
47
|
+
return result
|
48
|
+
end
|
49
|
+
|
50
|
+
def initialize html: nil
|
51
|
+
self.html = html
|
52
|
+
end
|
53
|
+
|
54
|
+
# helpers for seamless initialisation no matter what starting point
|
55
|
+
|
56
|
+
def self.by_url(url)
|
57
|
+
self.new(html: Nokogiri::HTML(get_page(url)))
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.by_html_string(html_string)
|
61
|
+
self.new(html: Nokogiri::HTML(html_string))
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.by_html(html)
|
65
|
+
self.new(html: html)
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
# Returns a collection of pages based on a selector.
|
70
|
+
# Use to collect a collection of elements from a page.
|
71
|
+
def collection_by_selector(selector)
|
72
|
+
self.html.css(selector).map{|item| Lac::Page.by_html(item)}
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
!!!
|
2
|
+
%html
|
3
|
+
%head
|
4
|
+
%meta{:content => "text/html; charset=UTF-8", "http-equiv" => "Content-Type"}/
|
5
|
+
%title Lac
|
6
|
+
:css
|
7
|
+
td, th {
|
8
|
+
padding: 1em;
|
9
|
+
background-color: #efefff;
|
10
|
+
}
|
11
|
+
th {
|
12
|
+
background-color: #aeaeaf;
|
13
|
+
}
|
14
|
+
%body
|
15
|
+
%h1 Lac
|
16
|
+
%ul
|
17
|
+
%a{href: "/scrapers"}= "Scrapers (#{scrapers.count})"
|
18
|
+
%a{href: "/models"}= "Models (#{models.count})"
|
19
|
+
= content
|
@@ -0,0 +1 @@
|
|
1
|
+
%p
|
@@ -0,0 +1,18 @@
|
|
1
|
+
%h1= "Model #{model.name}"
|
2
|
+
|
3
|
+
%table
|
4
|
+
%tbody
|
5
|
+
%th Primary Key
|
6
|
+
%td= model.primary_key
|
7
|
+
%a{href: "/models/#{model.name.underscore}/csv"} Export to CSV
|
8
|
+
%table
|
9
|
+
%thead
|
10
|
+
%th ID
|
11
|
+
- model.fields.each do |field|
|
12
|
+
%th= field
|
13
|
+
%tbody
|
14
|
+
- model.all.each do |id, obj|
|
15
|
+
%tr
|
16
|
+
%td= id
|
17
|
+
- model.fields.each do |field|
|
18
|
+
%td= obj[field.to_sym]
|
metadata
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: lac
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rene van Pelt
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-12-31 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: fileutils
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: colorize
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: activesupport
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.4'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sinatra
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: haml
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description:
|
98
|
+
email:
|
99
|
+
executables:
|
100
|
+
- lac
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- Gemfile
|
105
|
+
- bin/lac
|
106
|
+
- lib/lac.rb
|
107
|
+
- lib/lac/application.rb
|
108
|
+
- lib/lac/base.rb
|
109
|
+
- lib/lac/base_model.rb
|
110
|
+
- lib/lac/cli.rb
|
111
|
+
- lib/lac/page.rb
|
112
|
+
- views/base.html.haml
|
113
|
+
- views/home.html.haml
|
114
|
+
- views/model.html.haml
|
115
|
+
- views/models.html.haml
|
116
|
+
- views/scrapers.html.haml
|
117
|
+
homepage:
|
118
|
+
licenses: []
|
119
|
+
metadata: {}
|
120
|
+
post_install_message:
|
121
|
+
rdoc_options: []
|
122
|
+
require_paths:
|
123
|
+
- lib
|
124
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0'
|
129
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
requirements: []
|
135
|
+
rubyforge_project:
|
136
|
+
rubygems_version: 2.6.14
|
137
|
+
signing_key:
|
138
|
+
specification_version: 4
|
139
|
+
summary: lac is a DRY framework for creating scrapers
|
140
|
+
test_files: []
|