pubchem 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/README.markdown +23 -0
- data/Rakefile +1 -0
- data/bin/console +13 -0
- data/bin/setup +7 -0
- data/example.rb +7 -0
- data/exe/.gitkeep +0 -0
- data/lib/pubchem.rb +63 -0
- data/lib/pubchem/version.rb +3 -0
- data/pubchem.gemspec +28 -0
- data/run +2 -0
- metadata +87 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cc1a04a9f940becd4f4eff582d8105d6f3772eed
|
4
|
+
data.tar.gz: 224b9440fe38fcfa39fe9b360a2f32a9e145b27f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6f72420e95796c668a1154877ef5ad2455569e00fbf618ee2ebfc5256433fbcf0cb0471d67ecf2044bce075f0dbf9a59f769610026620936a74dc488fa8a0e22
|
7
|
+
data.tar.gz: 297ba5d561ed323425c6c5804eceb035d17baf2c8865acf7b4bade28ccb544b32dd79a6e9f319c92acc5bdd45e1e0d1a075648779fcd1598104d36db34acc62a
|
data/Gemfile
ADDED
data/README.markdown
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Pubchem image downloader
|
2
|
+
|
3
|
+
For getting all that juicy substance and compound data from Pubchem.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
`apt-get install wget`
|
8
|
+
|
9
|
+
Or
|
10
|
+
|
11
|
+
`sudo apt-get install wget`
|
12
|
+
|
13
|
+
`gem install pubchem`
|
14
|
+
|
15
|
+
## Usage
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
pubchem = Pubchem.new
|
19
|
+
|
20
|
+
pubchem.get_ids([16,405], "~/yay.zip")
|
21
|
+
|
22
|
+
puts "Do a happy dance!"
|
23
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/console
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "bundler/setup"
|
3
|
+
require "pubchem"
|
4
|
+
|
5
|
+
# You can add fixtures and/or initialization code here to make experimentin
|
6
|
+
# with your gem easier. You can also use a different console, if you like.
|
7
|
+
|
8
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
9
|
+
# require "pry"
|
10
|
+
# Pry.start
|
11
|
+
|
12
|
+
require "irb"
|
13
|
+
IRB.start
|
data/bin/setup
ADDED
data/example.rb
ADDED
data/exe/.gitkeep
ADDED
File without changes
|
data/lib/pubchem.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
|
3
|
+
class Pubchem
|
4
|
+
|
5
|
+
attr_accessor :agent
|
6
|
+
|
7
|
+
def initialize(agent=nil)
|
8
|
+
|
9
|
+
@agent = agent
|
10
|
+
@agent ||= Mechanize.new { |agent|
|
11
|
+
agent.follow_meta_refresh = true
|
12
|
+
}
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_ids(ids,
|
17
|
+
filename,
|
18
|
+
db: :compound,
|
19
|
+
retrieve_mode: :image,
|
20
|
+
delay: nil)
|
21
|
+
|
22
|
+
ids = ids.join(",") if ids.is_a? Array
|
23
|
+
filename = File.expand_path(filename)
|
24
|
+
|
25
|
+
@agent.get('https://pubchem.ncbi.nlm.nih.gov/pc_fetch/pc_fetch.cgi') do |page|
|
26
|
+
|
27
|
+
response = page.form() do |form|
|
28
|
+
|
29
|
+
form.idstr = ids
|
30
|
+
form.retmode = retrieve_mode.to_s
|
31
|
+
form.db = "pc#{db}"
|
32
|
+
button = form.buttons.select { |b| b.value == "Download" }.first
|
33
|
+
end.submit
|
34
|
+
|
35
|
+
ftp_link = response.links.select {|l| l.uri.scheme == "ftp"}.first
|
36
|
+
|
37
|
+
while not ftp_link
|
38
|
+
delay ||= 0.875 + rand / 2
|
39
|
+
sleep(delay)
|
40
|
+
|
41
|
+
reqid_link = response.links.select {|l| l.to_s.start_with? "pc_fetch.cgi?reqid" }.first
|
42
|
+
response = @agent.get(reqid_link)
|
43
|
+
ftp_link = response.links.select {|l| l.uri.scheme == "ftp"}.first
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
ftp_url = ftp_link.to_s
|
48
|
+
size = ftp_url.size
|
49
|
+
|
50
|
+
# We don't want to allow scary characters into our URL since it is a
|
51
|
+
# security risk, so we only allow lower and upper case letters, numbers,
|
52
|
+
# / forward slashes
|
53
|
+
# : colons
|
54
|
+
# . periods
|
55
|
+
# - dashes
|
56
|
+
ftp_url.gsub!(/[^a-zA-Z0-9\/\:\.\-]/u,'')
|
57
|
+
raise "Invalid character detected" if ftp_url.size != size
|
58
|
+
|
59
|
+
system("wget", ftp_url, "-O", filename)
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/pubchem.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'pubchem/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "pubchem"
|
8
|
+
spec.version = Pubchem::VERSION
|
9
|
+
spec.authors = ["Zach Aysan"]
|
10
|
+
spec.email = ["zachaysan@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{ Collect Pubchem substance and compound data }
|
13
|
+
spec.description = %q{ While there is a great FTP mirror for
|
14
|
+
molecule data, it is hard to deal with
|
15
|
+
their form. This helps with that!}
|
16
|
+
spec.homepage = "https://github.com/zachaysan/pubchem"
|
17
|
+
spec.license = "MIT"
|
18
|
+
|
19
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_runtime_dependency "mechanize", "~> 2.7.3"
|
25
|
+
|
26
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
27
|
+
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pubchem
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Zach Aysan
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-09-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.7.3
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.7.3
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.10'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.10'
|
41
|
+
description: |2-
|
42
|
+
While there is a great FTP mirror for
|
43
|
+
molecule data, it is hard to deal with
|
44
|
+
their form. This helps with that!
|
45
|
+
email:
|
46
|
+
- zachaysan@gmail.com
|
47
|
+
executables:
|
48
|
+
- ".gitkeep"
|
49
|
+
extensions: []
|
50
|
+
extra_rdoc_files: []
|
51
|
+
files:
|
52
|
+
- Gemfile
|
53
|
+
- README.markdown
|
54
|
+
- Rakefile
|
55
|
+
- bin/console
|
56
|
+
- bin/setup
|
57
|
+
- example.rb
|
58
|
+
- exe/.gitkeep
|
59
|
+
- lib/pubchem.rb
|
60
|
+
- lib/pubchem/version.rb
|
61
|
+
- pubchem.gemspec
|
62
|
+
- run
|
63
|
+
homepage: https://github.com/zachaysan/pubchem
|
64
|
+
licenses:
|
65
|
+
- MIT
|
66
|
+
metadata: {}
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: '0'
|
81
|
+
requirements: []
|
82
|
+
rubyforge_project:
|
83
|
+
rubygems_version: 2.4.7
|
84
|
+
signing_key:
|
85
|
+
specification_version: 4
|
86
|
+
summary: Collect Pubchem substance and compound data
|
87
|
+
test_files: []
|