alparser 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +7 -0
- data/Gemfile +3 -0
- data/Guardfile +9 -0
- data/LICENSE.txt +22 -0
- data/README.md +53 -0
- data/Rakefile +1 -0
- data/alparser.gemspec +31 -0
- data/lib/alparser/branc.rb +112 -0
- data/lib/alparser/climb.rb +5 -0
- data/lib/alparser/parser.rb +31 -0
- data/lib/alparser/version.rb +3 -0
- data/lib/alparser.rb +28 -0
- data/spec/alparser_spec.rb +56 -0
- data/spec/requests/kozjak_club_climbs.txt +926 -0
- data/spec/requests/kozjak_club_climbs_p1.txt +926 -0
- data/spec/requests/kozjak_club_climbs_p2.txt +926 -0
- data/spec/requests/kozjak_club_climbs_p3.txt +926 -0
- data/spec/requests/kozjak_club_climbs_p4.txt +926 -0
- data/spec/requests/kozjak_club_climbs_p5.txt +926 -0
- data/spec/requests/kozjak_club_climbs_p6.txt +926 -0
- data/spec/spec_helper.rb +35 -0
- metadata +204 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1e9f1dbc82696f98688236a9087e77df92e20796
|
4
|
+
data.tar.gz: fdcc16345076e0414c18af65ce58ea2c953dc48a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 65145f7068c78e2ee7778c23895c536c748f82daa69f3b1779604e9f2f08d44873ec7693f95441a535415ec6f80667e74f56ce981f679edfca0d75bb6d5557a4
|
7
|
+
data.tar.gz: aeaca3e514c9fccd1a87dcf49c36e706eb92afe8c19efd02bf72a198b772685148f9196be10f072a34a74a0cd73cd8611b72d329aaf1ec511ef9db957e7d6ae3
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
alparser
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.1.0
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Guardfile
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
guard :rspec, all_on_start: false, all_after_pass: false, parallel: false do
|
2
|
+
watch(%r{^spec/.+_spec\.rb$})
|
3
|
+
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
4
|
+
watch(%r{^lib/alparser/(.+)\.rb$}) { |m| "spec/alparser/#{m[1]}_spec.rb" }
|
5
|
+
|
6
|
+
watch('spec/spec_helper.rb') { "spec" }
|
7
|
+
watch('lib/alparser.rb') { "spec" }
|
8
|
+
|
9
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Oto Brglez
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# Alparser
|
2
|
+
|
3
|
+
[![Gem Version][fury-badge]][fury] [![Build Status][travis-badge]][travis]
|
4
|
+
|
5
|
+
Alparser is parser for Slovenian mountaineering pages.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'alparser'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install alparser
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require "alparser"
|
25
|
+
|
26
|
+
page = Alparser.new "http://www.kozjak.org", parser: Alparser::Branc
|
27
|
+
puts page.club_climbs
|
28
|
+
```
|
29
|
+
|
30
|
+
## Supported pages
|
31
|
+
|
32
|
+
| Page | Parser | Implemented
|
33
|
+
| ------------------------------------------- | ------------------- | ------------
|
34
|
+
| [APD Kozjak](http://www.kozjak.org/) | Alparser::Branc | Yes
|
35
|
+
| [PD TAM](http://www.pdtam.org/) | Alparser::Branc | Yes
|
36
|
+
| [AO Celje](http://www.aocelje.org/) | Alparser::Branc | Yes
|
37
|
+
| [PD Grmada AO](http://ao.pdgrmada.org/) | Alparser::Branc | Yes
|
38
|
+
| [AlpHut](http://alphut.net/) | Alparser::Branc | Yes
|
39
|
+
| [AlpHut](http://alphut.net/) | Alparser::Branc | Yes
|
40
|
+
| [Plezanje.net](http://plezanje.net/) | Alparser::Plezanje | No
|
41
|
+
| [Alpinizem.info](http://www.alpinizem.info/)| Alparser::Alpinizem | No
|
42
|
+
|
43
|
+
## Why?
|
44
|
+
|
45
|
+
This is highly experimental project. Purpose is to collect mountaineering data from Slovenia. Project is under MIT licence; feel free to use it.
|
46
|
+
|
47
|
+
- [Oto Brglez](http://otobrglez.opalab.com)
|
48
|
+
|
49
|
+
[fury-badge]: https://badge.fury.io/rb/alparser.png
|
50
|
+
[fury]: http://badge.fury.io/rb/alparser
|
51
|
+
[travis-badge]: https://secure.travis-ci.org/otobrglez/alparser.png?branch=master
|
52
|
+
[travis]: http://travis-ci.org/otobrglez/alparser
|
53
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/alparser.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'alparser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "alparser"
|
8
|
+
spec.version = Alparser::VERSION
|
9
|
+
spec.authors = ["Oto Brglez"]
|
10
|
+
spec.email = ["otobrglez@gmail.com"]
|
11
|
+
spec.description = %q{Parser for Slovenian mountaineering pages}
|
12
|
+
spec.summary = %q{Alparser supports several most common Slovenian mountaineering pages.}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "httparty"
|
22
|
+
spec.add_dependency "nokogiri"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
25
|
+
spec.add_development_dependency "rake"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "webmock"
|
28
|
+
spec.add_development_dependency "guard"
|
29
|
+
spec.add_development_dependency "guard-rspec"
|
30
|
+
spec.add_development_dependency "shoulda-matchers"
|
31
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
class Alparser::Branc
|
2
|
+
include Alparser::Parser
|
3
|
+
|
4
|
+
attr_accessor :number_of_pages
|
5
|
+
|
6
|
+
def club_climbs page:1, leto: nil
|
7
|
+
|
8
|
+
response = self.class.get "/vzponi-vse.php", query: {
|
9
|
+
page: page,
|
10
|
+
leto: (leto.nil? ? Time.now.year : year)
|
11
|
+
}
|
12
|
+
|
13
|
+
handle_page_number response
|
14
|
+
handle_climbs response
|
15
|
+
end
|
16
|
+
|
17
|
+
def all_club_climbs
|
18
|
+
out = club_climbs
|
19
|
+
number_of_pages, i = @number_of_pages, 2
|
20
|
+
|
21
|
+
while i <= number_of_pages
|
22
|
+
out = out + club_climbs(page: i)
|
23
|
+
i += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
out
|
27
|
+
end
|
28
|
+
|
29
|
+
def handle_climbs response
|
30
|
+
response.parsed_response.xpath("//table[@bgcolor='silver']/tr[position()>1]").to_a.map! do |item|
|
31
|
+
climb_form item
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def handle_page_number response
|
36
|
+
@number_of_pages ||= 0
|
37
|
+
@number_of_pages = ((/^\<span\>.(\d+)/.match(response.parsed_response.xpath("//a[@class='pagebox']/span").to_a.last.to_s))[1].to_i) rescue 1
|
38
|
+
end
|
39
|
+
|
40
|
+
def climb_form item
|
41
|
+
date = Date.parse(item.xpath("./td[position()=1]").text)
|
42
|
+
route = item.xpath("./td[position()=2]/a").text.strip
|
43
|
+
route ||= item.xpath("./td[position()=2]//b").text.strip
|
44
|
+
|
45
|
+
id = nil
|
46
|
+
if item.xpath("./td[position()=2]/a").first.attributes["href"].to_s =~ /pid=(\d+)$/
|
47
|
+
id = Integer(Regexp.last_match[1].to_s)
|
48
|
+
end
|
49
|
+
|
50
|
+
if id.nil? and item.xpath("./td[position()=2]/a").first.attributes["href"].to_s =~ /rid=(\d+)$/
|
51
|
+
id = Integer(Regexp.last_match[1].to_s)
|
52
|
+
end
|
53
|
+
|
54
|
+
mountain_range = item.xpath("./td[position()=3]").text.strip
|
55
|
+
country = item.xpath("./td[position()=4]").text.lstrip.strip.sub!(/\u00A0/, "")
|
56
|
+
country ||= item.xpath("./td[position()=4]").text
|
57
|
+
|
58
|
+
has_notes = item.xpath("./td[position()=2]/img[@src='ikone/opombe.gif']").size == 1
|
59
|
+
has_images = item.xpath("./td[position()=2]/img[@src='ikone/fotke.gif']").size == 1
|
60
|
+
|
61
|
+
country_id = nil
|
62
|
+
begin
|
63
|
+
if item.xpath("./td[position()=4]/img").first.attributes["src"].to_s =~ /\/(\w+)\.gif$/
|
64
|
+
country_id = Regexp.last_match[1].to_s
|
65
|
+
end
|
66
|
+
rescue
|
67
|
+
# Noting
|
68
|
+
end
|
69
|
+
|
70
|
+
grade = item.xpath("./td[position()=5]").text
|
71
|
+
climber = item.xpath("./td[position()=6]/a")
|
72
|
+
|
73
|
+
climber_id = nil
|
74
|
+
if climber.first.attributes["href"].to_s =~ /mem=(\d+)/
|
75
|
+
climber_id = Regexp.last_match[1].to_i
|
76
|
+
end
|
77
|
+
|
78
|
+
climber = climber.text.strip
|
79
|
+
|
80
|
+
conditions = item.xpath("./td[position()=7]").text
|
81
|
+
kind = item.xpath("./td[position()=8]").text
|
82
|
+
kind = kind == "" ? nil : kind
|
83
|
+
|
84
|
+
climb = Alparser::Climb.new(
|
85
|
+
id: id,
|
86
|
+
date: date,
|
87
|
+
route: route,
|
88
|
+
mountain_range: mountain_range,
|
89
|
+
country: country,
|
90
|
+
country_id: country_id,
|
91
|
+
grade: grade,
|
92
|
+
conditions: conditions,
|
93
|
+
kind: kind,
|
94
|
+
has_notes: has_notes,
|
95
|
+
has_images: has_images,
|
96
|
+
user_name: climber,
|
97
|
+
user_id: climber_id
|
98
|
+
)
|
99
|
+
|
100
|
+
climb.base_uri = @url
|
101
|
+
|
102
|
+
unless climb.user_id.nil?
|
103
|
+
climb.user = Alparser::User.new(
|
104
|
+
id: climb.user_id,
|
105
|
+
name: climb.user_name
|
106
|
+
)
|
107
|
+
end
|
108
|
+
|
109
|
+
climb
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Alparser::Parser
|
2
|
+
|
3
|
+
class HtmlParser < HTTParty::Parser
|
4
|
+
SupportedFormats.merge!('text/html' => :html)
|
5
|
+
|
6
|
+
def html
|
7
|
+
doc = Nokogiri::HTML(body)
|
8
|
+
doc.encoding = 'utf-8'
|
9
|
+
doc
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
module ClassMethods
|
14
|
+
attr_accessor :url
|
15
|
+
end
|
16
|
+
|
17
|
+
module InstanceMethods
|
18
|
+
|
19
|
+
def initialize url
|
20
|
+
self.class.base_uri(@url = url)
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.included(receiver)
|
26
|
+
receiver.extend ClassMethods
|
27
|
+
receiver.send :include, HTTParty
|
28
|
+
receiver.send :include, InstanceMethods
|
29
|
+
receiver.send :parser, HtmlParser
|
30
|
+
end
|
31
|
+
end
|
data/lib/alparser.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require "alparser/version"
|
2
|
+
|
3
|
+
require "forwardable"
|
4
|
+
require "httparty"
|
5
|
+
require "nokogiri"
|
6
|
+
|
7
|
+
class Alparser
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
autoload :Parser, "alparser/parser"
|
11
|
+
autoload :Climb, "alparser/climb"
|
12
|
+
autoload :User, "alparser/climb"
|
13
|
+
|
14
|
+
autoload :Branc, "alparser/branc"
|
15
|
+
|
16
|
+
attr_reader :url
|
17
|
+
attr_accessor :parser
|
18
|
+
|
19
|
+
def initialize url, parser: Alparser::Branc
|
20
|
+
@url = url
|
21
|
+
@parser = parser.new(@url)
|
22
|
+
end
|
23
|
+
|
24
|
+
def_delegators :@parser, \
|
25
|
+
:club_climbs, :all_club_climbs, \
|
26
|
+
:number_of_pages
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Alparser do
|
4
|
+
|
5
|
+
context "simple parse" do
|
6
|
+
|
7
|
+
let(:page) { Alparser.new("http://www.kozjak.org") }
|
8
|
+
|
9
|
+
it { expect(page).to respond_to :club_climbs }
|
10
|
+
it { expect(page).to respond_to :all_club_climbs }
|
11
|
+
|
12
|
+
context "#club_climbs" do
|
13
|
+
before {
|
14
|
+
stub_request(:get, /kozjak/)
|
15
|
+
.to_return { request_from "kozjak_club_climbs" }
|
16
|
+
}
|
17
|
+
|
18
|
+
it { expect(page.club_climbs).to be_kind_of Array }
|
19
|
+
it { expect(page.club_climbs[0]).to be_kind_of Alparser::Climb }
|
20
|
+
|
21
|
+
it do
|
22
|
+
page.club_climbs
|
23
|
+
expect(page.number_of_pages).to eq 5
|
24
|
+
end
|
25
|
+
|
26
|
+
context ".first" do
|
27
|
+
subject { page.club_climbs.first }
|
28
|
+
it do
|
29
|
+
expect(subject).to respond_to :date, \
|
30
|
+
:route, :mountain_range, \
|
31
|
+
:country, :country_id, \
|
32
|
+
:grade, \
|
33
|
+
:user, :user_id, :user_name, \
|
34
|
+
:conditions, :kind, \
|
35
|
+
:has_notes, :has_images
|
36
|
+
end
|
37
|
+
|
38
|
+
it { expect(subject.user).to be_kind_of Alparser::User }
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context "#all_club_climbs" do
|
43
|
+
before do
|
44
|
+
6.times do |t|
|
45
|
+
stub_request(:get, /page=#{t}/)
|
46
|
+
.to_return { request_from "kozjak_club_climbs_p#{t}" }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
it { expect(page.all_club_climbs).to be_kind_of Array }
|
51
|
+
it { expect(page.all_club_climbs.size).to eq 216 }
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|