alparser 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1e9f1dbc82696f98688236a9087e77df92e20796
4
+ data.tar.gz: fdcc16345076e0414c18af65ce58ea2c953dc48a
5
+ SHA512:
6
+ metadata.gz: 65145f7068c78e2ee7778c23895c536c748f82daa69f3b1779604e9f2f08d44873ec7693f95441a535415ec6f80667e74f56ce981f679edfca0d75bb6d5557a4
7
+ data.tar.gz: aeaca3e514c9fccd1a87dcf49c36e706eb92afe8c19efd02bf72a198b772685148f9196be10f072a34a74a0cd73cd8611b72d329aaf1ec511ef9db957e7d6ae3
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ example.rb
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format doc
2
+ --color
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ alparser
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-2.1.0
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ script: "bundle exec rspec"
2
+
3
+ install: "travis_retry bundle install"
4
+
5
+ rvm:
6
+ - 2.0.0
7
+ - 2.1.0
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,9 @@
1
+ guard :rspec, all_on_start: false, all_after_pass: false, parallel: false do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
4
+ watch(%r{^lib/alparser/(.+)\.rb$}) { |m| "spec/alparser/#{m[1]}_spec.rb" }
5
+
6
+ watch('spec/spec_helper.rb') { "spec" }
7
+ watch('lib/alparser.rb') { "spec" }
8
+
9
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Oto Brglez
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # Alparser
2
+
3
+ [![Gem Version][fury-badge]][fury] [![Build Status][travis-badge]][travis]
4
+
5
+ Alparser is parser for Slovenian mountaineering pages.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'alparser'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install alparser
20
+
21
+ ## Usage
22
+
23
+ ```ruby
24
+ require "alparser"
25
+
26
+ page = Alparser.new "http://www.kozjak.org", parser: Alparser::Branc
27
+ puts page.club_climbs
28
+ ```
29
+
30
+ ## Supported pages
31
+
32
+ | Page | Parser | Implemented
33
+ | ------------------------------------------- | ------------------- | ------------
34
+ | [APD Kozjak](http://www.kozjak.org/) | Alparser::Branc | Yes
35
+ | [PD TAM](http://www.pdtam.org/) | Alparser::Branc | Yes
36
+ | [AO Celje](http://www.aocelje.org/) | Alparser::Branc | Yes
37
+ | [PD Grmada AO](http://ao.pdgrmada.org/) | Alparser::Branc | Yes
38
+ | [AlpHut](http://alphut.net/) | Alparser::Branc | Yes
39
+ | [AlpHut](http://alphut.net/) | Alparser::Branc | Yes
40
+ | [Plezanje.net](http://plezanje.net/) | Alparser::Plezanje | No
41
+ | [Alpinizem.info](http://www.alpinizem.info/)| Alparser::Alpinizem | No
42
+
43
+ ## Why?
44
+
45
+ This is highly experimental project. Purpose is to collect mountaineering data from Slovenia. Project is under MIT licence; feel free to use it.
46
+
47
+ - [Oto Brglez](http://otobrglez.opalab.com)
48
+
49
+ [fury-badge]: https://badge.fury.io/rb/alparser.png
50
+ [fury]: http://badge.fury.io/rb/alparser
51
+ [travis-badge]: https://secure.travis-ci.org/otobrglez/alparser.png?branch=master
52
+ [travis]: http://travis-ci.org/otobrglez/alparser
53
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/alparser.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'alparser/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "alparser"
8
+ spec.version = Alparser::VERSION
9
+ spec.authors = ["Oto Brglez"]
10
+ spec.email = ["otobrglez@gmail.com"]
11
+ spec.description = %q{Parser for Slovenian mountaineering pages}
12
+ spec.summary = %q{Alparser supports several most common Slovenian mountaineering pages.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "httparty"
22
+ spec.add_dependency "nokogiri"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "webmock"
28
+ spec.add_development_dependency "guard"
29
+ spec.add_development_dependency "guard-rspec"
30
+ spec.add_development_dependency "shoulda-matchers"
31
+ end
@@ -0,0 +1,112 @@
1
+ class Alparser::Branc
2
+ include Alparser::Parser
3
+
4
+ attr_accessor :number_of_pages
5
+
6
+ def club_climbs page:1, leto: nil
7
+
8
+ response = self.class.get "/vzponi-vse.php", query: {
9
+ page: page,
10
+ leto: (leto.nil? ? Time.now.year : year)
11
+ }
12
+
13
+ handle_page_number response
14
+ handle_climbs response
15
+ end
16
+
17
+ def all_club_climbs
18
+ out = club_climbs
19
+ number_of_pages, i = @number_of_pages, 2
20
+
21
+ while i <= number_of_pages
22
+ out = out + club_climbs(page: i)
23
+ i += 1
24
+ end
25
+
26
+ out
27
+ end
28
+
29
+ def handle_climbs response
30
+ response.parsed_response.xpath("//table[@bgcolor='silver']/tr[position()>1]").to_a.map! do |item|
31
+ climb_form item
32
+ end
33
+ end
34
+
35
+ def handle_page_number response
36
+ @number_of_pages ||= 0
37
+ @number_of_pages = ((/^\<span\>.(\d+)/.match(response.parsed_response.xpath("//a[@class='pagebox']/span").to_a.last.to_s))[1].to_i) rescue 1
38
+ end
39
+
40
+ def climb_form item
41
+ date = Date.parse(item.xpath("./td[position()=1]").text)
42
+ route = item.xpath("./td[position()=2]/a").text.strip
43
+ route ||= item.xpath("./td[position()=2]//b").text.strip
44
+
45
+ id = nil
46
+ if item.xpath("./td[position()=2]/a").first.attributes["href"].to_s =~ /pid=(\d+)$/
47
+ id = Integer(Regexp.last_match[1].to_s)
48
+ end
49
+
50
+ if id.nil? and item.xpath("./td[position()=2]/a").first.attributes["href"].to_s =~ /rid=(\d+)$/
51
+ id = Integer(Regexp.last_match[1].to_s)
52
+ end
53
+
54
+ mountain_range = item.xpath("./td[position()=3]").text.strip
55
+ country = item.xpath("./td[position()=4]").text.lstrip.strip.sub!(/\u00A0/, "")
56
+ country ||= item.xpath("./td[position()=4]").text
57
+
58
+ has_notes = item.xpath("./td[position()=2]/img[@src='ikone/opombe.gif']").size == 1
59
+ has_images = item.xpath("./td[position()=2]/img[@src='ikone/fotke.gif']").size == 1
60
+
61
+ country_id = nil
62
+ begin
63
+ if item.xpath("./td[position()=4]/img").first.attributes["src"].to_s =~ /\/(\w+)\.gif$/
64
+ country_id = Regexp.last_match[1].to_s
65
+ end
66
+ rescue
67
+ # Noting
68
+ end
69
+
70
+ grade = item.xpath("./td[position()=5]").text
71
+ climber = item.xpath("./td[position()=6]/a")
72
+
73
+ climber_id = nil
74
+ if climber.first.attributes["href"].to_s =~ /mem=(\d+)/
75
+ climber_id = Regexp.last_match[1].to_i
76
+ end
77
+
78
+ climber = climber.text.strip
79
+
80
+ conditions = item.xpath("./td[position()=7]").text
81
+ kind = item.xpath("./td[position()=8]").text
82
+ kind = kind == "" ? nil : kind
83
+
84
+ climb = Alparser::Climb.new(
85
+ id: id,
86
+ date: date,
87
+ route: route,
88
+ mountain_range: mountain_range,
89
+ country: country,
90
+ country_id: country_id,
91
+ grade: grade,
92
+ conditions: conditions,
93
+ kind: kind,
94
+ has_notes: has_notes,
95
+ has_images: has_images,
96
+ user_name: climber,
97
+ user_id: climber_id
98
+ )
99
+
100
+ climb.base_uri = @url
101
+
102
+ unless climb.user_id.nil?
103
+ climb.user = Alparser::User.new(
104
+ id: climb.user_id,
105
+ name: climb.user_name
106
+ )
107
+ end
108
+
109
+ climb
110
+ end
111
+
112
+ end
@@ -0,0 +1,5 @@
1
+ class Alparser::Climb < OpenStruct
2
+ end
3
+
4
+ class Alparser::User < OpenStruct
5
+ end
@@ -0,0 +1,31 @@
1
+ module Alparser::Parser
2
+
3
+ class HtmlParser < HTTParty::Parser
4
+ SupportedFormats.merge!('text/html' => :html)
5
+
6
+ def html
7
+ doc = Nokogiri::HTML(body)
8
+ doc.encoding = 'utf-8'
9
+ doc
10
+ end
11
+ end
12
+
13
+ module ClassMethods
14
+ attr_accessor :url
15
+ end
16
+
17
+ module InstanceMethods
18
+
19
+ def initialize url
20
+ self.class.base_uri(@url = url)
21
+ end
22
+
23
+ end
24
+
25
+ def self.included(receiver)
26
+ receiver.extend ClassMethods
27
+ receiver.send :include, HTTParty
28
+ receiver.send :include, InstanceMethods
29
+ receiver.send :parser, HtmlParser
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ class Alparser
2
+ VERSION = "0.1.1"
3
+ end
data/lib/alparser.rb ADDED
@@ -0,0 +1,28 @@
1
+ require "alparser/version"
2
+
3
+ require "forwardable"
4
+ require "httparty"
5
+ require "nokogiri"
6
+
7
+ class Alparser
8
+ extend Forwardable
9
+
10
+ autoload :Parser, "alparser/parser"
11
+ autoload :Climb, "alparser/climb"
12
+ autoload :User, "alparser/climb"
13
+
14
+ autoload :Branc, "alparser/branc"
15
+
16
+ attr_reader :url
17
+ attr_accessor :parser
18
+
19
+ def initialize url, parser: Alparser::Branc
20
+ @url = url
21
+ @parser = parser.new(@url)
22
+ end
23
+
24
+ def_delegators :@parser, \
25
+ :club_climbs, :all_club_climbs, \
26
+ :number_of_pages
27
+
28
+ end
@@ -0,0 +1,56 @@
1
+ require "spec_helper"
2
+
3
+ describe Alparser do
4
+
5
+ context "simple parse" do
6
+
7
+ let(:page) { Alparser.new("http://www.kozjak.org") }
8
+
9
+ it { expect(page).to respond_to :club_climbs }
10
+ it { expect(page).to respond_to :all_club_climbs }
11
+
12
+ context "#club_climbs" do
13
+ before {
14
+ stub_request(:get, /kozjak/)
15
+ .to_return { request_from "kozjak_club_climbs" }
16
+ }
17
+
18
+ it { expect(page.club_climbs).to be_kind_of Array }
19
+ it { expect(page.club_climbs[0]).to be_kind_of Alparser::Climb }
20
+
21
+ it do
22
+ page.club_climbs
23
+ expect(page.number_of_pages).to eq 5
24
+ end
25
+
26
+ context ".first" do
27
+ subject { page.club_climbs.first }
28
+ it do
29
+ expect(subject).to respond_to :date, \
30
+ :route, :mountain_range, \
31
+ :country, :country_id, \
32
+ :grade, \
33
+ :user, :user_id, :user_name, \
34
+ :conditions, :kind, \
35
+ :has_notes, :has_images
36
+ end
37
+
38
+ it { expect(subject.user).to be_kind_of Alparser::User }
39
+ end
40
+ end
41
+
42
+ context "#all_club_climbs" do
43
+ before do
44
+ 6.times do |t|
45
+ stub_request(:get, /page=#{t}/)
46
+ .to_return { request_from "kozjak_club_climbs_p#{t}" }
47
+ end
48
+ end
49
+
50
+ it { expect(page.all_club_climbs).to be_kind_of Array }
51
+ it { expect(page.all_club_climbs.size).to eq 216 }
52
+ end
53
+
54
+ end
55
+
56
+ end