itunes_crawler 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ script: "bundle exec rspec spec"
3
+ rvm:
4
+ - 1.8.7
5
+ - 1.9.2
6
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ if RUBY_VERSION =~ /1.9/
2
+ Encoding.default_external = Encoding::UTF_8
3
+ Encoding.default_internal = Encoding::UTF_8
4
+ end
5
+
6
+ source 'https://rubygems.org'
7
+
8
+ # Specify your gem's dependencies in itunes_crawler.gemspec
9
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Barnabás Birmacher
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ ## ITunesCrawler
2
+ [![Build Status](https://travis-ci.org/birmacher/iTunesCrawler.png)](https://travis-ci.org/birmacher/iTunesCrawler)
3
+ [![Code Climate](https://codeclimate.com/github/birmacher/iTunesCrawler.png)](https://codeclimate.com/github/birmacher/iTunesCrawler)
4
+
5
+ ITunesCrawler provides an easy way to download the requested iTunes data through Apple's Search API.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'itunes_crawler'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ ```console
18
+ $ bundle
19
+ ```
20
+
21
+ Or install it yourself as:
22
+
23
+ ```console
24
+ $ gem install itunes_crawler
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ### Download iTunes info for a single app
30
+
31
+ ```ruby
32
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
33
+ storefrontFetcher.on_success { |itunes_item| p itunes_item["trackName"] }
34
+ storefrontFetcher.on_fail { |app_id| p "Nooooooo" }
35
+ storefrontFetcher.fetch( '557137623', 'us' )
36
+ ```
37
+
38
+ ### Download iTunes info for multiple apps
39
+
40
+ ```ruby
41
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
42
+ storefrontFetcher.on_success { |itunes_item| p itunes_item["trackName"] }
43
+ storefrontFetcher.on_fail { |app_id| p "Nooooooo" }
44
+ storefrontFetcher.fetch( ['557137623', '284882215'], 'us' )
45
+ ```
46
+
47
+ ### Change retry count of failed requests
48
+
49
+ ```ruby
50
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new( 1 )
51
+ ```
52
+
53
+ or
54
+
55
+ ```ruby
56
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
57
+ storefrontFetcher.retry_count = 1
58
+ ```
59
+
60
+ ## Contributing
61
+
62
+ 1. Fork it
63
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
64
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
65
+ 4. Push to the branch (`git push origin my-new-feature`)
66
+ 5. Create new Pull Request
67
+
68
+ ## License
69
+ MIT license
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'itunes_crawler/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "itunes_crawler"
8
+ spec.version = ITunesCrawler::VERSION
9
+ spec.authors = ["Barnabas Birmacher"]
10
+ spec.email = ["birmacher@gmail.com"]
11
+ spec.description = "ITunesCrawler provides an easy way to download the requested iTunes data through Apple's Search API."
12
+ spec.summary = "ITunesCrawler provides an easy way to download the requested iTunes data through Apple's Search API."
13
+ spec.homepage = "https://github.com/birmacher/iTunesCrawler"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "typhoeus", "~> 0.6.2"
22
+ spec.add_dependency "json", "~> 1.7.7"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
27
+ end
@@ -0,0 +1,5 @@
1
+ require "itunes_crawler/version"
2
+ require "itunes_crawler/storefront_fetcher"
3
+
4
+ module ITunesCrawler
5
+ end
@@ -0,0 +1,20 @@
1
+ require "typhoeus"
2
+ require "itunes_crawler/storefront_fetcher/callback"
3
+ require "itunes_crawler/storefront_fetcher/fetcher"
4
+
5
+ module ITunesCrawler
6
+
7
+ class StorefrontFetcher
8
+ include StorefrontFetcher::Callback
9
+ include StorefrontFetcher::Fetcher
10
+
11
+ attr_accessor :retry_count
12
+
13
+ def initialize( retry_count=5 )
14
+ @retry_count = retry_count
15
+ @hydra = Typhoeus::Hydra.new
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,18 @@
1
+ module ITunesCrawler
2
+ class StorefrontFetcher
3
+
4
+ module Callback
5
+ # Completetion block
6
+ def on_success(&block)
7
+ @success_block = block if block_given?
8
+ @success_block
9
+ end
10
+
11
+ def on_fail(&block)
12
+ @fail_block = block if block_given?
13
+ @fail_block
14
+ end
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,81 @@
1
+ require "json"
2
+ require "cgi"
3
+ require "itunes_crawler/storefront_fetcher/retryable"
4
+
5
+ module ITunesCrawler
6
+ class StorefrontFetcher
7
+
8
+ module Fetcher
9
+ include StorefrontFetcher::Retryable
10
+
11
+ # Download iTunes item data
12
+ def fetch(itunes_ids, country_code)
13
+ # Save iTunes IDs to an array
14
+ ( ( queue ||= [] ) << itunes_ids ).flatten!
15
+
16
+ # Slice the array to multiple arrays with size of 200
17
+ chunks = queue.each_slice( 200 ).to_a
18
+ chunks.each do |chunk|
19
+ add_url_to_queue "https://itunes.apple.com/lookup?id=#{chunk.join(",")}&country=#{country_code}", false
20
+ end
21
+ @hydra.run
22
+ end
23
+
24
+ private
25
+
26
+ def add_url_to_queue(url, start=true)
27
+ request = Typhoeus::Request.new( url, :followlocation => true )
28
+
29
+ # Callbacks
30
+ request.on_success do |response|
31
+ return unless @success_block
32
+
33
+ # Save response
34
+ json_response = JSON.parse(response.body)
35
+ itunes_data = json_response["results"]
36
+
37
+ # Also a failed request if there is no itunes_data
38
+ if !itunes_data ||
39
+ itunes_data.empty?
40
+
41
+ get_failed_itunes_ids( response.request.base_url )
42
+ else
43
+ # Parse downloaded data
44
+ itunes_data.each do |itunes_item|
45
+ @success_block.call itunes_item
46
+ end
47
+ end
48
+ end
49
+
50
+ request.on_failure do |response|
51
+ failed_url = response.request.base_url
52
+
53
+ if retryable?( failed_url )
54
+ add_url_to_queue( failed_url )
55
+ else
56
+ get_failed_itunes_ids( failed_url )
57
+ end
58
+ end
59
+
60
+ # Add to queue and start if requested
61
+ @hydra.queue request
62
+ @hydra.run if start
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def get_failed_itunes_ids_from_url(url)
69
+ CGI.parse(URI(url).query)["id"].first.split( "," )
70
+ end
71
+
72
+ def get_failed_itunes_ids( url )
73
+ return unless @fail_block
74
+
75
+ get_failed_itunes_ids_from_url( url ).each do |itunes_id|
76
+ @fail_block.call( itunes_id )
77
+ end
78
+ end
79
+
80
+ end
81
+ end
@@ -0,0 +1,29 @@
1
+ module ITunesCrawler
2
+ class StorefrontFetcher
3
+
4
+ module Retryable
5
+ def retryable?(url)
6
+ return false if @retry_count <= 0
7
+
8
+ @failed_urls ||= {}
9
+
10
+ # It's our first try
11
+ # save the url and that's all for now
12
+ unless @failed_urls[url]
13
+ @failed_urls[url] = 1
14
+ true
15
+ else
16
+ # No more try for you
17
+ retries = @failed_urls[url]
18
+ if retries + 1 >= @retry_count
19
+ false
20
+ else
21
+ @failed_urls[url] = retries + 1
22
+ true
23
+ end
24
+ end
25
+ end
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,3 @@
1
+ module ITunesCrawler
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'itunes_crawler'
4
+
5
+ RSpec.configure do |config|
6
+
7
+ end
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+
3
+ describe ITunesCrawler::StorefrontFetcher do
4
+ describe "it should download data from iTunes for" do
5
+ it "one iTunes id" do
6
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
7
+ storefrontFetcher.on_success do |itunes_item|
8
+ itunes_item.should be_a_kind_of( Hash )
9
+ end
10
+ storefrontFetcher.on_fail { |itunes_id| fail }
11
+ storefrontFetcher.fetch( '557137623', 'us' )
12
+ end
13
+
14
+ it "multiple itunes ids" do
15
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
16
+ storefrontFetcher.on_success do |itunes_item|
17
+ itunes_item.should be_a_kind_of( Hash )
18
+ end
19
+ storefrontFetcher.on_fail { |itunes_id| fail }
20
+ storefrontFetcher.fetch( ['557137623', '284882215'], 'us' )
21
+ end
22
+ end
23
+
24
+ describe "response should include readable values" do
25
+ it do
26
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
27
+ storefrontFetcher.on_success do |itunes_item|
28
+ itunes_item["kind"].should eql "software"
29
+ end
30
+ storefrontFetcher.on_fail { |itunes_id| fail }
31
+ storefrontFetcher.fetch( '557137623', 'us' )
32
+ end
33
+ end
34
+
35
+ describe "it should fail while trying to download data for invalid iTunes ID" do
36
+ it do
37
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
38
+ storefrontFetcher.on_success { |itunes_id| fail }
39
+ storefrontFetcher.on_fail do |itunes_id|
40
+ itunes_id.should eql "nope"
41
+ end
42
+ storefrontFetcher.fetch( "nope", 'us' )
43
+ end
44
+ end
45
+ end
metadata ADDED
@@ -0,0 +1,147 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: itunes_crawler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Barnabas Birmacher
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-03-28 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: typhoeus
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.6.2
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.6.2
30
+ - !ruby/object:Gem::Dependency
31
+ name: json
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.7.7
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.7.7
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '1.3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rspec
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description: ITunesCrawler provides an easy way to download the requested iTunes data
95
+ through Apple's Search API.
96
+ email:
97
+ - birmacher@gmail.com
98
+ executables: []
99
+ extensions: []
100
+ extra_rdoc_files: []
101
+ files:
102
+ - .gitignore
103
+ - .rspec
104
+ - .travis.yml
105
+ - Gemfile
106
+ - LICENSE.txt
107
+ - README.md
108
+ - Rakefile
109
+ - itunes_crawler.gemspec
110
+ - lib/itunes_crawler.rb
111
+ - lib/itunes_crawler/storefront_fetcher.rb
112
+ - lib/itunes_crawler/storefront_fetcher/callback.rb
113
+ - lib/itunes_crawler/storefront_fetcher/fetcher.rb
114
+ - lib/itunes_crawler/storefront_fetcher/retryable.rb
115
+ - lib/itunes_crawler/version.rb
116
+ - spec/spec_helper.rb
117
+ - spec/storefront_fetcher_spec.rb
118
+ homepage: https://github.com/birmacher/iTunesCrawler
119
+ licenses:
120
+ - MIT
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ none: false
127
+ requirements:
128
+ - - ! '>='
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ required_rubygems_version: !ruby/object:Gem::Requirement
132
+ none: false
133
+ requirements:
134
+ - - ! '>='
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ requirements: []
138
+ rubyforge_project:
139
+ rubygems_version: 1.8.24
140
+ signing_key:
141
+ specification_version: 3
142
+ summary: ITunesCrawler provides an easy way to download the requested iTunes data
143
+ through Apple's Search API.
144
+ test_files:
145
+ - spec/spec_helper.rb
146
+ - spec/storefront_fetcher_spec.rb
147
+ has_rdoc: