pubchem 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/README.markdown +23 -0
- data/Rakefile +1 -0
- data/bin/console +13 -0
- data/bin/setup +7 -0
- data/example.rb +7 -0
- data/exe/.gitkeep +0 -0
- data/lib/pubchem.rb +63 -0
- data/lib/pubchem/version.rb +3 -0
- data/pubchem.gemspec +28 -0
- data/run +2 -0
- metadata +87 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cc1a04a9f940becd4f4eff582d8105d6f3772eed
|
4
|
+
data.tar.gz: 224b9440fe38fcfa39fe9b360a2f32a9e145b27f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6f72420e95796c668a1154877ef5ad2455569e00fbf618ee2ebfc5256433fbcf0cb0471d67ecf2044bce075f0dbf9a59f769610026620936a74dc488fa8a0e22
|
7
|
+
data.tar.gz: 297ba5d561ed323425c6c5804eceb035d17baf2c8865acf7b4bade28ccb544b32dd79a6e9f319c92acc5bdd45e1e0d1a075648779fcd1598104d36db34acc62a
|
data/Gemfile
ADDED
data/README.markdown
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Pubchem image downloader
|
2
|
+
|
3
|
+
For getting all that juicy substance and compound data from Pubchem.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
`apt-get install wget`
|
8
|
+
|
9
|
+
Or
|
10
|
+
|
11
|
+
`sudo apt-get install wget`
|
12
|
+
|
13
|
+
`gem install pubchem`
|
14
|
+
|
15
|
+
## Usage
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
pubchem = Pubchem.new
|
19
|
+
|
20
|
+
pubchem.get_ids([16,405], "~/yay.zip")
|
21
|
+
|
22
|
+
puts "Do a happy dance!"
|
23
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/console
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "bundler/setup"
|
3
|
+
require "pubchem"
|
4
|
+
|
5
|
+
# You can add fixtures and/or initialization code here to make experimentin
|
6
|
+
# with your gem easier. You can also use a different console, if you like.
|
7
|
+
|
8
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
9
|
+
# require "pry"
|
10
|
+
# Pry.start
|
11
|
+
|
12
|
+
require "irb"
|
13
|
+
IRB.start
|
data/bin/setup
ADDED
data/example.rb
ADDED
data/exe/.gitkeep
ADDED
File without changes
|
data/lib/pubchem.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
|
3
|
+
class Pubchem
|
4
|
+
|
5
|
+
attr_accessor :agent
|
6
|
+
|
7
|
+
def initialize(agent=nil)
|
8
|
+
|
9
|
+
@agent = agent
|
10
|
+
@agent ||= Mechanize.new { |agent|
|
11
|
+
agent.follow_meta_refresh = true
|
12
|
+
}
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_ids(ids,
|
17
|
+
filename,
|
18
|
+
db: :compound,
|
19
|
+
retrieve_mode: :image,
|
20
|
+
delay: nil)
|
21
|
+
|
22
|
+
ids = ids.join(",") if ids.is_a? Array
|
23
|
+
filename = File.expand_path(filename)
|
24
|
+
|
25
|
+
@agent.get('https://pubchem.ncbi.nlm.nih.gov/pc_fetch/pc_fetch.cgi') do |page|
|
26
|
+
|
27
|
+
response = page.form() do |form|
|
28
|
+
|
29
|
+
form.idstr = ids
|
30
|
+
form.retmode = retrieve_mode.to_s
|
31
|
+
form.db = "pc#{db}"
|
32
|
+
button = form.buttons.select { |b| b.value == "Download" }.first
|
33
|
+
end.submit
|
34
|
+
|
35
|
+
ftp_link = response.links.select {|l| l.uri.scheme == "ftp"}.first
|
36
|
+
|
37
|
+
while not ftp_link
|
38
|
+
delay ||= 0.875 + rand / 2
|
39
|
+
sleep(delay)
|
40
|
+
|
41
|
+
reqid_link = response.links.select {|l| l.to_s.start_with? "pc_fetch.cgi?reqid" }.first
|
42
|
+
response = @agent.get(reqid_link)
|
43
|
+
ftp_link = response.links.select {|l| l.uri.scheme == "ftp"}.first
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
ftp_url = ftp_link.to_s
|
48
|
+
size = ftp_url.size
|
49
|
+
|
50
|
+
# We don't want to allow scary characters into our URL since it is a
|
51
|
+
# security risk, so we only allow lower and upper case letters, numbers,
|
52
|
+
# / forward slashes
|
53
|
+
# : colons
|
54
|
+
# . periods
|
55
|
+
# - dashes
|
56
|
+
ftp_url.gsub!(/[^a-zA-Z0-9\/\:\.\-]/u,'')
|
57
|
+
raise "Invalid character detected" if ftp_url.size != size
|
58
|
+
|
59
|
+
system("wget", ftp_url, "-O", filename)
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/pubchem.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'pubchem/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "pubchem"
|
8
|
+
spec.version = Pubchem::VERSION
|
9
|
+
spec.authors = ["Zach Aysan"]
|
10
|
+
spec.email = ["zachaysan@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{ Collect Pubchem substance and compound data }
|
13
|
+
spec.description = %q{ While there is a great FTP mirror for
|
14
|
+
molecule data, it is hard to deal with
|
15
|
+
their form. This helps with that!}
|
16
|
+
spec.homepage = "https://github.com/zachaysan/pubchem"
|
17
|
+
spec.license = "MIT"
|
18
|
+
|
19
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_runtime_dependency "mechanize", "~> 2.7.3"
|
25
|
+
|
26
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
27
|
+
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pubchem
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Zach Aysan
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-09-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.7.3
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.7.3
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.10'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.10'
|
41
|
+
description: |2-
|
42
|
+
While there is a great FTP mirror for
|
43
|
+
molecule data, it is hard to deal with
|
44
|
+
their form. This helps with that!
|
45
|
+
email:
|
46
|
+
- zachaysan@gmail.com
|
47
|
+
executables:
|
48
|
+
- ".gitkeep"
|
49
|
+
extensions: []
|
50
|
+
extra_rdoc_files: []
|
51
|
+
files:
|
52
|
+
- Gemfile
|
53
|
+
- README.markdown
|
54
|
+
- Rakefile
|
55
|
+
- bin/console
|
56
|
+
- bin/setup
|
57
|
+
- example.rb
|
58
|
+
- exe/.gitkeep
|
59
|
+
- lib/pubchem.rb
|
60
|
+
- lib/pubchem/version.rb
|
61
|
+
- pubchem.gemspec
|
62
|
+
- run
|
63
|
+
homepage: https://github.com/zachaysan/pubchem
|
64
|
+
licenses:
|
65
|
+
- MIT
|
66
|
+
metadata: {}
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: '0'
|
81
|
+
requirements: []
|
82
|
+
rubyforge_project:
|
83
|
+
rubygems_version: 2.4.7
|
84
|
+
signing_key:
|
85
|
+
specification_version: 4
|
86
|
+
summary: Collect Pubchem substance and compound data
|
87
|
+
test_files: []
|