itunes_crawler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ script: "bundle exec rspec spec"
3
+ rvm:
4
+ - 1.8.7
5
+ - 1.9.2
6
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ if RUBY_VERSION =~ /1.9/
2
+ Encoding.default_external = Encoding::UTF_8
3
+ Encoding.default_internal = Encoding::UTF_8
4
+ end
5
+
6
+ source 'https://rubygems.org'
7
+
8
+ # Specify your gem's dependencies in itunes_crawler.gemspec
9
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Barnabás Birmacher
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ ## ITunesCrawler
2
+ [![Build Status](https://travis-ci.org/birmacher/iTunesCrawler.png)](https://travis-ci.org/birmacher/iTunesCrawler)
3
+ [![Code Climate](https://codeclimate.com/github/birmacher/iTunesCrawler.png)](https://codeclimate.com/github/birmacher/iTunesCrawler)
4
+
5
+ ITunesCrawler provides an easy way to download the requested iTunes data through Apple's Search API.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'itunes_crawler'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ ```console
18
+ $ bundle
19
+ ```
20
+
21
+ Or install it yourself as:
22
+
23
+ ```console
24
+ $ gem install itunes_crawler
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ### Download iTunes info for a single app
30
+
31
+ ```ruby
32
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
33
+ storefrontFetcher.on_success { |itunes_item| p itunes_item["trackName"] }
34
+ storefrontFetcher.on_fail { |app_id| p "Nooooooo" }
35
+ storefrontFetcher.fetch( '557137623', 'us' )
36
+ ```
37
+
38
+ ### Download iTunes info for multiple apps
39
+
40
+ ```ruby
41
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
42
+ storefrontFetcher.on_success { |itunes_item| p itunes_item["trackName"] }
43
+ storefrontFetcher.on_fail { |app_id| p "Nooooooo" }
44
+ storefrontFetcher.fetch( ['557137623', '284882215'], 'us' )
45
+ ```
46
+
47
+ ### Change retry count of failed requests
48
+
49
+ ```ruby
50
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new( 1 )
51
+ ```
52
+
53
+ or
54
+
55
+ ```ruby
56
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
57
+ storefrontFetcher.retry_count = 1
58
+ ```
59
+
60
+ ## Contributing
61
+
62
+ 1. Fork it
63
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
64
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
65
+ 4. Push to the branch (`git push origin my-new-feature`)
66
+ 5. Create new Pull Request
67
+
68
+ ## License
69
+ MIT license
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'itunes_crawler/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "itunes_crawler"
8
+ spec.version = ITunesCrawler::VERSION
9
+ spec.authors = ["Barnabas Birmacher"]
10
+ spec.email = ["birmacher@gmail.com"]
11
+ spec.description = "ITunesCrawler provides an easy way to download the requested iTunes data through Apple's Search API."
12
+ spec.summary = "ITunesCrawler provides an easy way to download the requested iTunes data through Apple's Search API."
13
+ spec.homepage = "https://github.com/birmacher/iTunesCrawler"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "typhoeus", "~> 0.6.2"
22
+ spec.add_dependency "json", "~> 1.7.7"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
27
+ end
@@ -0,0 +1,5 @@
1
+ require "itunes_crawler/version"
2
+ require "itunes_crawler/storefront_fetcher"
3
+
4
+ module ITunesCrawler
5
+ end
@@ -0,0 +1,20 @@
1
+ require "typhoeus"
2
+ require "itunes_crawler/storefront_fetcher/callback"
3
+ require "itunes_crawler/storefront_fetcher/fetcher"
4
+
5
+ module ITunesCrawler
6
+
7
+ class StorefrontFetcher
8
+ include StorefrontFetcher::Callback
9
+ include StorefrontFetcher::Fetcher
10
+
11
+ attr_accessor :retry_count
12
+
13
+ def initialize( retry_count=5 )
14
+ @retry_count = retry_count
15
+ @hydra = Typhoeus::Hydra.new
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,18 @@
1
+ module ITunesCrawler
2
+ class StorefrontFetcher
3
+
4
+ module Callback
5
+ # Completetion block
6
+ def on_success(&block)
7
+ @success_block = block if block_given?
8
+ @success_block
9
+ end
10
+
11
+ def on_fail(&block)
12
+ @fail_block = block if block_given?
13
+ @fail_block
14
+ end
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,81 @@
1
+ require "json"
2
+ require "cgi"
3
+ require "itunes_crawler/storefront_fetcher/retryable"
4
+
5
+ module ITunesCrawler
6
+ class StorefrontFetcher
7
+
8
+ module Fetcher
9
+ include StorefrontFetcher::Retryable
10
+
11
+ # Download iTunes item data
12
+ def fetch(itunes_ids, country_code)
13
+ # Save iTunes IDs to an array
14
+ ( ( queue ||= [] ) << itunes_ids ).flatten!
15
+
16
+ # Slice the array to multiple arrays with size of 200
17
+ chunks = queue.each_slice( 200 ).to_a
18
+ chunks.each do |chunk|
19
+ add_url_to_queue "https://itunes.apple.com/lookup?id=#{chunk.join(",")}&country=#{country_code}", false
20
+ end
21
+ @hydra.run
22
+ end
23
+
24
+ private
25
+
26
+ def add_url_to_queue(url, start=true)
27
+ request = Typhoeus::Request.new( url, :followlocation => true )
28
+
29
+ # Callbacks
30
+ request.on_success do |response|
31
+ return unless @success_block
32
+
33
+ # Save response
34
+ json_response = JSON.parse(response.body)
35
+ itunes_data = json_response["results"]
36
+
37
+ # Also a failed request if there is no itunes_data
38
+ if !itunes_data ||
39
+ itunes_data.empty?
40
+
41
+ get_failed_itunes_ids( response.request.base_url )
42
+ else
43
+ # Parse downloaded data
44
+ itunes_data.each do |itunes_item|
45
+ @success_block.call itunes_item
46
+ end
47
+ end
48
+ end
49
+
50
+ request.on_failure do |response|
51
+ failed_url = response.request.base_url
52
+
53
+ if retryable?( failed_url )
54
+ add_url_to_queue( failed_url )
55
+ else
56
+ get_failed_itunes_ids( failed_url )
57
+ end
58
+ end
59
+
60
+ # Add to queue and start if requested
61
+ @hydra.queue request
62
+ @hydra.run if start
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def get_failed_itunes_ids_from_url(url)
69
+ CGI.parse(URI(url).query)["id"].first.split( "," )
70
+ end
71
+
72
+ def get_failed_itunes_ids( url )
73
+ return unless @fail_block
74
+
75
+ get_failed_itunes_ids_from_url( url ).each do |itunes_id|
76
+ @fail_block.call( itunes_id )
77
+ end
78
+ end
79
+
80
+ end
81
+ end
@@ -0,0 +1,29 @@
1
+ module ITunesCrawler
2
+ class StorefrontFetcher
3
+
4
+ module Retryable
5
+ def retryable?(url)
6
+ return false if @retry_count <= 0
7
+
8
+ @failed_urls ||= {}
9
+
10
+ # It's our first try
11
+ # save the url and that's all for now
12
+ unless @failed_urls[url]
13
+ @failed_urls[url] = 1
14
+ true
15
+ else
16
+ # No more try for you
17
+ retries = @failed_urls[url]
18
+ if retries + 1 >= @retry_count
19
+ false
20
+ else
21
+ @failed_urls[url] = retries + 1
22
+ true
23
+ end
24
+ end
25
+ end
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,3 @@
1
+ module ITunesCrawler
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'itunes_crawler'
4
+
5
+ RSpec.configure do |config|
6
+
7
+ end
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+
3
+ describe ITunesCrawler::StorefrontFetcher do
4
+ describe "it should download data from iTunes for" do
5
+ it "one iTunes id" do
6
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
7
+ storefrontFetcher.on_success do |itunes_item|
8
+ itunes_item.should be_a_kind_of( Hash )
9
+ end
10
+ storefrontFetcher.on_fail { |itunes_id| fail }
11
+ storefrontFetcher.fetch( '557137623', 'us' )
12
+ end
13
+
14
+ it "multiple itunes ids" do
15
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
16
+ storefrontFetcher.on_success do |itunes_item|
17
+ itunes_item.should be_a_kind_of( Hash )
18
+ end
19
+ storefrontFetcher.on_fail { |itunes_id| fail }
20
+ storefrontFetcher.fetch( ['557137623', '284882215'], 'us' )
21
+ end
22
+ end
23
+
24
+ describe "response should include readable values" do
25
+ it do
26
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
27
+ storefrontFetcher.on_success do |itunes_item|
28
+ itunes_item["kind"].should eql "software"
29
+ end
30
+ storefrontFetcher.on_fail { |itunes_id| fail }
31
+ storefrontFetcher.fetch( '557137623', 'us' )
32
+ end
33
+ end
34
+
35
+ describe "it should fail while trying to download data for invalid iTunes ID" do
36
+ it do
37
+ storefrontFetcher = ITunesCrawler::StorefrontFetcher.new
38
+ storefrontFetcher.on_success { |itunes_id| fail }
39
+ storefrontFetcher.on_fail do |itunes_id|
40
+ itunes_id.should eql "nope"
41
+ end
42
+ storefrontFetcher.fetch( "nope", 'us' )
43
+ end
44
+ end
45
+ end
metadata ADDED
@@ -0,0 +1,147 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: itunes_crawler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Barnabas Birmacher
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-03-28 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: typhoeus
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.6.2
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.6.2
30
+ - !ruby/object:Gem::Dependency
31
+ name: json
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.7.7
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.7.7
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '1.3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rspec
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description: ITunesCrawler provides an easy way to download the requested iTunes data
95
+ through Apple's Search API.
96
+ email:
97
+ - birmacher@gmail.com
98
+ executables: []
99
+ extensions: []
100
+ extra_rdoc_files: []
101
+ files:
102
+ - .gitignore
103
+ - .rspec
104
+ - .travis.yml
105
+ - Gemfile
106
+ - LICENSE.txt
107
+ - README.md
108
+ - Rakefile
109
+ - itunes_crawler.gemspec
110
+ - lib/itunes_crawler.rb
111
+ - lib/itunes_crawler/storefront_fetcher.rb
112
+ - lib/itunes_crawler/storefront_fetcher/callback.rb
113
+ - lib/itunes_crawler/storefront_fetcher/fetcher.rb
114
+ - lib/itunes_crawler/storefront_fetcher/retryable.rb
115
+ - lib/itunes_crawler/version.rb
116
+ - spec/spec_helper.rb
117
+ - spec/storefront_fetcher_spec.rb
118
+ homepage: https://github.com/birmacher/iTunesCrawler
119
+ licenses:
120
+ - MIT
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ none: false
127
+ requirements:
128
+ - - ! '>='
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ required_rubygems_version: !ruby/object:Gem::Requirement
132
+ none: false
133
+ requirements:
134
+ - - ! '>='
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ requirements: []
138
+ rubyforge_project:
139
+ rubygems_version: 1.8.24
140
+ signing_key:
141
+ specification_version: 3
142
+ summary: ITunesCrawler provides an easy way to download the requested iTunes data
143
+ through Apple's Search API.
144
+ test_files:
145
+ - spec/spec_helper.rb
146
+ - spec/storefront_fetcher_spec.rb
147
+ has_rdoc: