tomosia_amanaplus_crawl 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- metadata +4 -17
- data/.gitignore +0 -11
- data/.rspec +0 -3
- data/.travis.yml +0 -6
- data/CODE_OF_CONDUCT.md +0 -74
- data/Gemfile +0 -11
- data/LICENSE.txt +0 -21
- data/README.md +0 -45
- data/Rakefile +0 -6
- data/bin/console +0 -14
- data/bin/setup +0 -8
- data/lib/tomosia_amanaplus_crawl.rb +0 -93
- data/lib/tomosia_amanaplus_crawl/version.rb +0 -3
- data/tomosia_amanaplus_crawl.gemspec +0 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04e624b0298728425799e2be8ff9b66d6b005494c33ee995da7bef56e964f43c
|
4
|
+
data.tar.gz: d0cfbb83e80444c55784f4d3f82e39004cea67af8d38f578de3ae2a60b660209
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 965b25db27a2b6fa9b3e4d71f85d9a3f5d73cf18822ce49eda30de6afdd667c1e338bd973bbd604824282932874e0cf97284be8665f86b9b7a2acc68a3c1d9cf
|
7
|
+
data.tar.gz: 60a4b6b544799479951966b4c595be812d08759809b00a60094082270d13a9038728e208b6afffa6364daefb8c4a7805da592b4ff41eb38b6f64c598e797da43
|
metadata
CHANGED
@@ -1,34 +1,21 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tomosia_amanaplus_crawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nhat Huy
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: tomosia_amanaplus_crawl demo project crawl du lieu.
|
14
|
-
email: nhathuych@
|
14
|
+
email: nhathuych@gmail.com
|
15
15
|
executables: []
|
16
16
|
extensions: []
|
17
17
|
extra_rdoc_files: []
|
18
|
-
files:
|
19
|
-
- ".gitignore"
|
20
|
-
- ".rspec"
|
21
|
-
- ".travis.yml"
|
22
|
-
- CODE_OF_CONDUCT.md
|
23
|
-
- Gemfile
|
24
|
-
- LICENSE.txt
|
25
|
-
- README.md
|
26
|
-
- Rakefile
|
27
|
-
- bin/console
|
28
|
-
- bin/setup
|
29
|
-
- lib/tomosia_amanaplus_crawl.rb
|
30
|
-
- lib/tomosia_amanaplus_crawl/version.rb
|
31
|
-
- tomosia_amanaplus_crawl.gemspec
|
18
|
+
files: []
|
32
19
|
homepage: https://github.com/tthuydang/tomosia_amanaplus_crawl
|
33
20
|
licenses:
|
34
21
|
- MIT
|
data/.gitignore
DELETED
data/.rspec
DELETED
data/.travis.yml
DELETED
data/CODE_OF_CONDUCT.md
DELETED
@@ -1,74 +0,0 @@
|
|
1
|
-
# Contributor Covenant Code of Conduct
|
2
|
-
|
3
|
-
## Our Pledge
|
4
|
-
|
5
|
-
In the interest of fostering an open and welcoming environment, we as
|
6
|
-
contributors and maintainers pledge to making participation in our project and
|
7
|
-
our community a harassment-free experience for everyone, regardless of age, body
|
8
|
-
size, disability, ethnicity, gender identity and expression, level of experience,
|
9
|
-
nationality, personal appearance, race, religion, or sexual identity and
|
10
|
-
orientation.
|
11
|
-
|
12
|
-
## Our Standards
|
13
|
-
|
14
|
-
Examples of behavior that contributes to creating a positive environment
|
15
|
-
include:
|
16
|
-
|
17
|
-
* Using welcoming and inclusive language
|
18
|
-
* Being respectful of differing viewpoints and experiences
|
19
|
-
* Gracefully accepting constructive criticism
|
20
|
-
* Focusing on what is best for the community
|
21
|
-
* Showing empathy towards other community members
|
22
|
-
|
23
|
-
Examples of unacceptable behavior by participants include:
|
24
|
-
|
25
|
-
* The use of sexualized language or imagery and unwelcome sexual attention or
|
26
|
-
advances
|
27
|
-
* Trolling, insulting/derogatory comments, and personal or political attacks
|
28
|
-
* Public or private harassment
|
29
|
-
* Publishing others' private information, such as a physical or electronic
|
30
|
-
address, without explicit permission
|
31
|
-
* Other conduct which could reasonably be considered inappropriate in a
|
32
|
-
professional setting
|
33
|
-
|
34
|
-
## Our Responsibilities
|
35
|
-
|
36
|
-
Project maintainers are responsible for clarifying the standards of acceptable
|
37
|
-
behavior and are expected to take appropriate and fair corrective action in
|
38
|
-
response to any instances of unacceptable behavior.
|
39
|
-
|
40
|
-
Project maintainers have the right and responsibility to remove, edit, or
|
41
|
-
reject comments, commits, code, wiki edits, issues, and other contributions
|
42
|
-
that are not aligned to this Code of Conduct, or to ban temporarily or
|
43
|
-
permanently any contributor for other behaviors that they deem inappropriate,
|
44
|
-
threatening, offensive, or harmful.
|
45
|
-
|
46
|
-
## Scope
|
47
|
-
|
48
|
-
This Code of Conduct applies both within project spaces and in public spaces
|
49
|
-
when an individual is representing the project or its community. Examples of
|
50
|
-
representing a project or community include using an official project e-mail
|
51
|
-
address, posting via an official social media account, or acting as an appointed
|
52
|
-
representative at an online or offline event. Representation of a project may be
|
53
|
-
further defined and clarified by project maintainers.
|
54
|
-
|
55
|
-
## Enforcement
|
56
|
-
|
57
|
-
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
-
reported by contacting the project team at tt.huy.dang@tomosia.com. All
|
59
|
-
complaints will be reviewed and investigated and will result in a response that
|
60
|
-
is deemed necessary and appropriate to the circumstances. The project team is
|
61
|
-
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
|
-
Further details of specific enforcement policies may be posted separately.
|
63
|
-
|
64
|
-
Project maintainers who do not follow or enforce the Code of Conduct in good
|
65
|
-
faith may face temporary or permanent repercussions as determined by other
|
66
|
-
members of the project's leadership.
|
67
|
-
|
68
|
-
## Attribution
|
69
|
-
|
70
|
-
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
71
|
-
available at [https://contributor-covenant.org/version/1/4][version]
|
72
|
-
|
73
|
-
[homepage]: https://contributor-covenant.org
|
74
|
-
[version]: https://contributor-covenant.org/version/1/4/
|
data/Gemfile
DELETED
data/LICENSE.txt
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
The MIT License (MIT)
|
2
|
-
|
3
|
-
Copyright (c) 2020 tthuydang
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
7
|
-
in the Software without restriction, including without limitation the rights
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
10
|
-
furnished to do so, subject to the following conditions:
|
11
|
-
|
12
|
-
The above copyright notice and this permission notice shall be included in
|
13
|
-
all copies or substantial portions of the Software.
|
14
|
-
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
-
THE SOFTWARE.
|
data/README.md
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
# TomosiaAmanaplusCrawl
|
2
|
-
|
3
|
-
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/tomosia_amanaplus_crawl`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
6
|
-
|
7
|
-
## Installation
|
8
|
-
|
9
|
-
Add this line to your application's Gemfile:
|
10
|
-
|
11
|
-
```ruby
|
12
|
-
gem 'tomosia_amanaplus_crawl'
|
13
|
-
```
|
14
|
-
|
15
|
-
And then execute:
|
16
|
-
|
17
|
-
$ bundle install
|
18
|
-
|
19
|
-
Or install it yourself as:
|
20
|
-
|
21
|
-
$ gem install tomosia_amanaplus_crawl
|
22
|
-
|
23
|
-
## Usage
|
24
|
-
|
25
|
-
TODO: Write usage instructions here
|
26
|
-
|
27
|
-
## Development
|
28
|
-
|
29
|
-
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
-
|
31
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
-
|
33
|
-
## Contributing
|
34
|
-
|
35
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/tomosia_amanaplus_crawl. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/tomosia_amanaplus_crawl/blob/master/CODE_OF_CONDUCT.md).
|
36
|
-
|
37
|
-
|
38
|
-
## License
|
39
|
-
|
40
|
-
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
41
|
-
|
42
|
-
## Code of Conduct
|
43
|
-
|
44
|
-
Everyone interacting in the TomosiaAmanaplusCrawl project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/tomosia_amanaplus_crawl/blob/master/CODE_OF_CONDUCT.md).
|
45
|
-
# tomosia_amanaplus_crawl
|
data/Rakefile
DELETED
data/bin/console
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require "bundler/setup"
|
4
|
-
require "tomosia_amanaplus_crawl"
|
5
|
-
|
6
|
-
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
-
# with your gem easier. You can also use a different console, if you like.
|
8
|
-
|
9
|
-
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
# require "pry"
|
11
|
-
# Pry.start
|
12
|
-
|
13
|
-
require "irb"
|
14
|
-
IRB.start(__FILE__)
|
data/bin/setup
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
require "tomosia_amanaplus_crawl/version"
|
2
|
-
require 'nokogiri'
|
3
|
-
require 'httparty'
|
4
|
-
require 'open-uri'
|
5
|
-
require 'fileutils'
|
6
|
-
require 'spreadsheet'
|
7
|
-
|
8
|
-
module TomosiaAmanaplusCrawl
|
9
|
-
class Crawler
|
10
|
-
URL = "https://plus.amanaimages.com/items/search/"
|
11
|
-
|
12
|
-
def run(keyword, destination)
|
13
|
-
unparsed_page = HTTParty.get("#{URL}/#{keyword}")
|
14
|
-
parsed_page = Nokogiri::HTML(unparsed_page)
|
15
|
-
|
16
|
-
pages = parsed_page.css("div.c-paginate__nums").css('a').last.text.to_i # tổng số page
|
17
|
-
images_listings = parsed_page.css("div.p-search-result__body") # danh sách các thẻ div chứa image
|
18
|
-
|
19
|
-
images = getPaginationImages(images_listings, pages, keyword)
|
20
|
-
downloadImages(images, destination)
|
21
|
-
writeToExcel(images, destination)
|
22
|
-
end
|
23
|
-
|
24
|
-
def getPaginationImages(images_listings, pages, keyword) # lấy tất cả image của các page cộng lại
|
25
|
-
images = Array.new
|
26
|
-
i = 0
|
27
|
-
curr_page = 1
|
28
|
-
while curr_page <= pages
|
29
|
-
puts "Crawling page #{curr_page}..........."
|
30
|
-
|
31
|
-
pagination_unparsed_page = HTTParty.get("https://plus.amanaimages.com/items/search/#{keyword}?page=#{curr_page}")
|
32
|
-
pagination_parsed_page = Nokogiri::HTML(pagination_unparsed_page)
|
33
|
-
pagination_images_listings = pagination_parsed_page.css("div.p-item-thumb")
|
34
|
-
|
35
|
-
pagination_images_listings.each do |img|
|
36
|
-
src = img.css('img').attr('data-src').nil? == true ? img.css('img').attr('src') : img.css('img').attr('data-src')
|
37
|
-
current_image = {
|
38
|
-
title: img.css('a')[1].attr('title'),
|
39
|
-
url: src.to_s,
|
40
|
-
size: 'unknow',
|
41
|
-
extension: ".#{src.to_s.split('.').last}"
|
42
|
-
}
|
43
|
-
images << current_image
|
44
|
-
# puts "#{i += 1}: #{src}"
|
45
|
-
end
|
46
|
-
|
47
|
-
curr_page += 1
|
48
|
-
end
|
49
|
-
images
|
50
|
-
end
|
51
|
-
|
52
|
-
# tải hình và cập nhật lại size
|
53
|
-
def downloadImages(images, destination)
|
54
|
-
path = "#{destination}/Downloads" # lưu hình ở folder Downloads
|
55
|
-
Dir.mkdir path unless File.exist? path
|
56
|
-
|
57
|
-
threads = []
|
58
|
-
print "\nDownloading"
|
59
|
-
images.each do |curr_image|
|
60
|
-
threads << Thread.new(curr_image) {
|
61
|
-
open(curr_image[:url]) do |image|
|
62
|
-
File.open("#{path}/#{curr_image[:url].split('/').last}", "a+") do |file|
|
63
|
-
file.write(image.read) # lưu hình ảnh
|
64
|
-
curr_image[:size] = image.size # cập nhật lại size trong mảng images
|
65
|
-
print "."
|
66
|
-
end
|
67
|
-
end # end open
|
68
|
-
}
|
69
|
-
end
|
70
|
-
threads.each { |t| t.join }
|
71
|
-
puts "\nDownloaded."
|
72
|
-
end
|
73
|
-
|
74
|
-
def writeToExcel(images, destination)
|
75
|
-
path = "#{destination}/File Excel" # lưu file ở folder File Excel
|
76
|
-
Dir.mkdir path unless File.exist? path
|
77
|
-
|
78
|
-
book = Spreadsheet::Workbook.new
|
79
|
-
sheet1 = book.create_worksheet
|
80
|
-
|
81
|
-
i = 0
|
82
|
-
sheet1.row(0).concat %w{Title Url Size(bytes) Extension}
|
83
|
-
puts "Writing..........."
|
84
|
-
images.each do |img|
|
85
|
-
sheet1.row(i += 1).push img[:title], img[:url], img[:size], img[:extension]
|
86
|
-
end
|
87
|
-
puts "Writed."
|
88
|
-
|
89
|
-
book.write "#{path}/YeuNgucLep.xls"
|
90
|
-
end
|
91
|
-
|
92
|
-
end
|
93
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
require_relative 'lib/tomosia_amanaplus_crawl/version'
|
2
|
-
|
3
|
-
Gem::Specification.new do |spec|
|
4
|
-
spec.name = "tomosia_amanaplus_crawl"
|
5
|
-
spec.version = TomosiaAmanaplusCrawl::VERSION
|
6
|
-
spec.authors = "Nhat Huy"
|
7
|
-
spec.email = "nhathuych@tomosia.com"
|
8
|
-
|
9
|
-
spec.summary = %q{tomosia_amanaplus_crawl demo project crawl du lieu.}
|
10
|
-
spec.description = %q{tomosia_amanaplus_crawl demo project crawl du lieu.}
|
11
|
-
spec.homepage = "https://github.com/tthuydang/tomosia_amanaplus_crawl"
|
12
|
-
spec.license = "MIT"
|
13
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
|
-
|
15
|
-
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
16
|
-
|
17
|
-
# spec.metadata["homepage_uri"] = spec.homepage
|
18
|
-
# spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
|
19
|
-
# spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
|
20
|
-
|
21
|
-
# Specify which files should be added to the gem when it is released.
|
22
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
-
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
24
|
-
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
-
end
|
26
|
-
spec.bindir = "exe"
|
27
|
-
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
-
spec.require_paths = ["lib"]
|
29
|
-
end
|