alparser 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1e9f1dbc82696f98688236a9087e77df92e20796
4
+ data.tar.gz: fdcc16345076e0414c18af65ce58ea2c953dc48a
5
+ SHA512:
6
+ metadata.gz: 65145f7068c78e2ee7778c23895c536c748f82daa69f3b1779604e9f2f08d44873ec7693f95441a535415ec6f80667e74f56ce981f679edfca0d75bb6d5557a4
7
+ data.tar.gz: aeaca3e514c9fccd1a87dcf49c36e706eb92afe8c19efd02bf72a198b772685148f9196be10f072a34a74a0cd73cd8611b72d329aaf1ec511ef9db957e7d6ae3
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ example.rb
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format doc
2
+ --color
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ alparser
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-2.1.0
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ script: "bundle exec rspec"
2
+
3
+ install: "travis_retry bundle install"
4
+
5
+ rvm:
6
+ - 2.0.0
7
+ - 2.1.0
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,9 @@
1
+ guard :rspec, all_on_start: false, all_after_pass: false, parallel: false do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
4
+ watch(%r{^lib/alparser/(.+)\.rb$}) { |m| "spec/alparser/#{m[1]}_spec.rb" }
5
+
6
+ watch('spec/spec_helper.rb') { "spec" }
7
+ watch('lib/alparser.rb') { "spec" }
8
+
9
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Oto Brglez
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # Alparser
2
+
3
+ [![Gem Version][fury-badge]][fury] [![Build Status][travis-badge]][travis]
4
+
5
+ Alparser is parser for Slovenian mountaineering pages.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'alparser'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install alparser
20
+
21
+ ## Usage
22
+
23
+ ```ruby
24
+ require "alparser"
25
+
26
+ page = Alparser.new "http://www.kozjak.org", parser: Alparser::Branc
27
+ puts page.club_climbs
28
+ ```
29
+
30
+ ## Supported pages
31
+
32
+ | Page | Parser | Implemented
33
+ | ------------------------------------------- | ------------------- | ------------
34
+ | [APD Kozjak](http://www.kozjak.org/) | Alparser::Branc | Yes
35
+ | [PD TAM](http://www.pdtam.org/) | Alparser::Branc | Yes
36
+ | [AO Celje](http://www.aocelje.org/) | Alparser::Branc | Yes
37
+ | [PD Grmada AO](http://ao.pdgrmada.org/) | Alparser::Branc | Yes
38
+ | [AlpHut](http://alphut.net/) | Alparser::Branc | Yes
39
+ | [AlpHut](http://alphut.net/) | Alparser::Branc | Yes
40
+ | [Plezanje.net](http://plezanje.net/) | Alparser::Plezanje | No
41
+ | [Alpinizem.info](http://www.alpinizem.info/)| Alparser::Alpinizem | No
42
+
43
+ ## Why?
44
+
45
+ This is highly experimental project. Purpose is to collect mountaineering data from Slovenia. Project is under MIT licence; feel free to use it.
46
+
47
+ - [Oto Brglez](http://otobrglez.opalab.com)
48
+
49
+ [fury-badge]: https://badge.fury.io/rb/alparser.png
50
+ [fury]: http://badge.fury.io/rb/alparser
51
+ [travis-badge]: https://secure.travis-ci.org/otobrglez/alparser.png?branch=master
52
+ [travis]: http://travis-ci.org/otobrglez/alparser
53
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/alparser.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'alparser/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "alparser"
8
+ spec.version = Alparser::VERSION
9
+ spec.authors = ["Oto Brglez"]
10
+ spec.email = ["otobrglez@gmail.com"]
11
+ spec.description = %q{Parser for Slovenian mountaineering pages}
12
+ spec.summary = %q{Alparser supports several most common Slovenian mountaineering pages.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "httparty"
22
+ spec.add_dependency "nokogiri"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "webmock"
28
+ spec.add_development_dependency "guard"
29
+ spec.add_development_dependency "guard-rspec"
30
+ spec.add_development_dependency "shoulda-matchers"
31
+ end
@@ -0,0 +1,112 @@
1
+ class Alparser::Branc
2
+ include Alparser::Parser
3
+
4
+ attr_accessor :number_of_pages
5
+
6
+ def club_climbs page:1, leto: nil
7
+
8
+ response = self.class.get "/vzponi-vse.php", query: {
9
+ page: page,
10
+ leto: (leto.nil? ? Time.now.year : year)
11
+ }
12
+
13
+ handle_page_number response
14
+ handle_climbs response
15
+ end
16
+
17
+ def all_club_climbs
18
+ out = club_climbs
19
+ number_of_pages, i = @number_of_pages, 2
20
+
21
+ while i <= number_of_pages
22
+ out = out + club_climbs(page: i)
23
+ i += 1
24
+ end
25
+
26
+ out
27
+ end
28
+
29
+ def handle_climbs response
30
+ response.parsed_response.xpath("//table[@bgcolor='silver']/tr[position()>1]").to_a.map! do |item|
31
+ climb_form item
32
+ end
33
+ end
34
+
35
+ def handle_page_number response
36
+ @number_of_pages ||= 0
37
+ @number_of_pages = ((/^\<span\>.(\d+)/.match(response.parsed_response.xpath("//a[@class='pagebox']/span").to_a.last.to_s))[1].to_i) rescue 1
38
+ end
39
+
40
+ def climb_form item
41
+ date = Date.parse(item.xpath("./td[position()=1]").text)
42
+ route = item.xpath("./td[position()=2]/a").text.strip
43
+ route ||= item.xpath("./td[position()=2]//b").text.strip
44
+
45
+ id = nil
46
+ if item.xpath("./td[position()=2]/a").first.attributes["href"].to_s =~ /pid=(\d+)$/
47
+ id = Integer(Regexp.last_match[1].to_s)
48
+ end
49
+
50
+ if id.nil? and item.xpath("./td[position()=2]/a").first.attributes["href"].to_s =~ /rid=(\d+)$/
51
+ id = Integer(Regexp.last_match[1].to_s)
52
+ end
53
+
54
+ mountain_range = item.xpath("./td[position()=3]").text.strip
55
+ country = item.xpath("./td[position()=4]").text.lstrip.strip.sub!(/\u00A0/, "")
56
+ country ||= item.xpath("./td[position()=4]").text
57
+
58
+ has_notes = item.xpath("./td[position()=2]/img[@src='ikone/opombe.gif']").size == 1
59
+ has_images = item.xpath("./td[position()=2]/img[@src='ikone/fotke.gif']").size == 1
60
+
61
+ country_id = nil
62
+ begin
63
+ if item.xpath("./td[position()=4]/img").first.attributes["src"].to_s =~ /\/(\w+)\.gif$/
64
+ country_id = Regexp.last_match[1].to_s
65
+ end
66
+ rescue
67
+ # Noting
68
+ end
69
+
70
+ grade = item.xpath("./td[position()=5]").text
71
+ climber = item.xpath("./td[position()=6]/a")
72
+
73
+ climber_id = nil
74
+ if climber.first.attributes["href"].to_s =~ /mem=(\d+)/
75
+ climber_id = Regexp.last_match[1].to_i
76
+ end
77
+
78
+ climber = climber.text.strip
79
+
80
+ conditions = item.xpath("./td[position()=7]").text
81
+ kind = item.xpath("./td[position()=8]").text
82
+ kind = kind == "" ? nil : kind
83
+
84
+ climb = Alparser::Climb.new(
85
+ id: id,
86
+ date: date,
87
+ route: route,
88
+ mountain_range: mountain_range,
89
+ country: country,
90
+ country_id: country_id,
91
+ grade: grade,
92
+ conditions: conditions,
93
+ kind: kind,
94
+ has_notes: has_notes,
95
+ has_images: has_images,
96
+ user_name: climber,
97
+ user_id: climber_id
98
+ )
99
+
100
+ climb.base_uri = @url
101
+
102
+ unless climb.user_id.nil?
103
+ climb.user = Alparser::User.new(
104
+ id: climb.user_id,
105
+ name: climb.user_name
106
+ )
107
+ end
108
+
109
+ climb
110
+ end
111
+
112
+ end
@@ -0,0 +1,5 @@
1
+ class Alparser::Climb < OpenStruct
2
+ end
3
+
4
+ class Alparser::User < OpenStruct
5
+ end
@@ -0,0 +1,31 @@
1
+ module Alparser::Parser
2
+
3
+ class HtmlParser < HTTParty::Parser
4
+ SupportedFormats.merge!('text/html' => :html)
5
+
6
+ def html
7
+ doc = Nokogiri::HTML(body)
8
+ doc.encoding = 'utf-8'
9
+ doc
10
+ end
11
+ end
12
+
13
+ module ClassMethods
14
+ attr_accessor :url
15
+ end
16
+
17
+ module InstanceMethods
18
+
19
+ def initialize url
20
+ self.class.base_uri(@url = url)
21
+ end
22
+
23
+ end
24
+
25
+ def self.included(receiver)
26
+ receiver.extend ClassMethods
27
+ receiver.send :include, HTTParty
28
+ receiver.send :include, InstanceMethods
29
+ receiver.send :parser, HtmlParser
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ class Alparser
2
+ VERSION = "0.1.1"
3
+ end
data/lib/alparser.rb ADDED
@@ -0,0 +1,28 @@
1
+ require "alparser/version"
2
+
3
+ require "forwardable"
4
+ require "httparty"
5
+ require "nokogiri"
6
+
7
+ class Alparser
8
+ extend Forwardable
9
+
10
+ autoload :Parser, "alparser/parser"
11
+ autoload :Climb, "alparser/climb"
12
+ autoload :User, "alparser/climb"
13
+
14
+ autoload :Branc, "alparser/branc"
15
+
16
+ attr_reader :url
17
+ attr_accessor :parser
18
+
19
+ def initialize url, parser: Alparser::Branc
20
+ @url = url
21
+ @parser = parser.new(@url)
22
+ end
23
+
24
+ def_delegators :@parser, \
25
+ :club_climbs, :all_club_climbs, \
26
+ :number_of_pages
27
+
28
+ end
@@ -0,0 +1,56 @@
1
+ require "spec_helper"
2
+
3
+ describe Alparser do
4
+
5
+ context "simple parse" do
6
+
7
+ let(:page) { Alparser.new("http://www.kozjak.org") }
8
+
9
+ it { expect(page).to respond_to :club_climbs }
10
+ it { expect(page).to respond_to :all_club_climbs }
11
+
12
+ context "#club_climbs" do
13
+ before {
14
+ stub_request(:get, /kozjak/)
15
+ .to_return { request_from "kozjak_club_climbs" }
16
+ }
17
+
18
+ it { expect(page.club_climbs).to be_kind_of Array }
19
+ it { expect(page.club_climbs[0]).to be_kind_of Alparser::Climb }
20
+
21
+ it do
22
+ page.club_climbs
23
+ expect(page.number_of_pages).to eq 5
24
+ end
25
+
26
+ context ".first" do
27
+ subject { page.club_climbs.first }
28
+ it do
29
+ expect(subject).to respond_to :date, \
30
+ :route, :mountain_range, \
31
+ :country, :country_id, \
32
+ :grade, \
33
+ :user, :user_id, :user_name, \
34
+ :conditions, :kind, \
35
+ :has_notes, :has_images
36
+ end
37
+
38
+ it { expect(subject.user).to be_kind_of Alparser::User }
39
+ end
40
+ end
41
+
42
+ context "#all_club_climbs" do
43
+ before do
44
+ 6.times do |t|
45
+ stub_request(:get, /page=#{t}/)
46
+ .to_return { request_from "kozjak_club_climbs_p#{t}" }
47
+ end
48
+ end
49
+
50
+ it { expect(page.all_club_climbs).to be_kind_of Array }
51
+ it { expect(page.all_club_climbs.size).to eq 216 }
52
+ end
53
+
54
+ end
55
+
56
+ end