openc_bot 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.travis.yml +8 -0
- data/CHANGELOG.md +2 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +253 -0
- data/Rakefile +14 -0
- data/bin/openc_bot +13 -0
- data/create_bot.sh +30 -0
- data/create_company_bot.sh +16 -0
- data/create_simple_licence_bot.sh +31 -0
- data/db/.gitkeep +0 -0
- data/examples/basic/.gitignore +3 -0
- data/examples/basic/Gemfile +7 -0
- data/examples/basic/config.yml +21 -0
- data/examples/basic/lib/basic.rb +88 -0
- data/examples/basic_with_proxy/Gemfile +7 -0
- data/examples/basic_with_proxy/config.yml +21 -0
- data/examples/basic_with_proxy/lib/basic_with_proxy.rb +103 -0
- data/examples/bot_with_simple_iterator/Gemfile +6 -0
- data/examples/bot_with_simple_iterator/config.yml +21 -0
- data/examples/bot_with_simple_iterator/lib/bot_with_simple_iterator.rb +112 -0
- data/examples/company_fetchers/basic.rb +49 -0
- data/lib/monkey_patches/mechanize.rb +53 -0
- data/lib/openc_bot.rb +89 -0
- data/lib/openc_bot/bot_data_validator.rb +18 -0
- data/lib/openc_bot/company_fetcher_bot.rb +40 -0
- data/lib/openc_bot/exceptions.rb +17 -0
- data/lib/openc_bot/helpers/_csv.rb +10 -0
- data/lib/openc_bot/helpers/alpha_search.rb +73 -0
- data/lib/openc_bot/helpers/dates.rb +33 -0
- data/lib/openc_bot/helpers/html.rb +8 -0
- data/lib/openc_bot/helpers/incremental_search.rb +106 -0
- data/lib/openc_bot/helpers/register_methods.rb +205 -0
- data/lib/openc_bot/helpers/text.rb +18 -0
- data/lib/openc_bot/incrementers.rb +2 -0
- data/lib/openc_bot/incrementers/base.rb +214 -0
- data/lib/openc_bot/incrementers/common.rb +47 -0
- data/lib/openc_bot/tasks.rb +385 -0
- data/lib/openc_bot/templates/README.md +35 -0
- data/lib/openc_bot/templates/bin/export_data +28 -0
- data/lib/openc_bot/templates/bin/fetch_data +23 -0
- data/lib/openc_bot/templates/bin/verify_data +1 -0
- data/lib/openc_bot/templates/config.yml +21 -0
- data/lib/openc_bot/templates/lib/bot.rb +43 -0
- data/lib/openc_bot/templates/lib/company_fetcher_bot.rb +95 -0
- data/lib/openc_bot/templates/lib/simple_bot.rb +67 -0
- data/lib/openc_bot/templates/spec/bot_spec.rb +11 -0
- data/lib/openc_bot/templates/spec/simple_bot_spec.rb +11 -0
- data/lib/openc_bot/templates/spec/spec_helper.rb +13 -0
- data/lib/openc_bot/version.rb +3 -0
- data/lib/simple_openc_bot.rb +289 -0
- data/openc_bot.gemspec +35 -0
- data/schemas/company-schema.json +112 -0
- data/schemas/includes/address.json +23 -0
- data/schemas/includes/base-statement.json +27 -0
- data/schemas/includes/company.json +14 -0
- data/schemas/includes/filing.json +20 -0
- data/schemas/includes/license-data.json +27 -0
- data/schemas/includes/officer.json +14 -0
- data/schemas/includes/previous_name.json +11 -0
- data/schemas/includes/share-parcel-data.json +67 -0
- data/schemas/includes/share-parcel.json +60 -0
- data/schemas/includes/subsidiary-relationship-data.json +52 -0
- data/schemas/includes/total-shares.json +10 -0
- data/schemas/licence-schema.json +21 -0
- data/schemas/share-parcel-schema.json +21 -0
- data/schemas/subsidiary-relationship-schema.json +19 -0
- data/spec/dummy_classes/foo_bot.rb +4 -0
- data/spec/lib/bot_data_validator_spec.rb +69 -0
- data/spec/lib/company_fetcher_bot_spec.rb +93 -0
- data/spec/lib/exceptions_spec.rb +25 -0
- data/spec/lib/helpers/alpha_search_spec.rb +173 -0
- data/spec/lib/helpers/dates_spec.rb +65 -0
- data/spec/lib/helpers/incremental_search_spec.rb +471 -0
- data/spec/lib/helpers/register_methods_spec.rb +558 -0
- data/spec/lib/helpers/text_spec.rb +50 -0
- data/spec/lib/openc_bot/db/.gitkeep +0 -0
- data/spec/lib/openc_bot/incrementers/common_spec.rb +83 -0
- data/spec/lib/openc_bot_spec.rb +116 -0
- data/spec/schemas/company-schema_spec.rb +676 -0
- data/spec/simple_openc_bot_spec.rb +302 -0
- data/spec/spec_helper.rb +19 -0
- metadata +300 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2c2492325f145ede40b77b666ab93b99c47ba314
|
4
|
+
data.tar.gz: 7aa0dd5faf896d3a7e6a2217092ced8227084e8b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9c2709f8c3cb91d06d6e356809e8adc2e16dd5499b5c85e217fe6637c2c4045b6e6add769ffded9844227ef16a04ea387f0d252a4d4ffeb80fd6cea5876f4faf
|
7
|
+
data.tar.gz: 94d5a2d6222a04164ee5f93cb266129790651f3aae587faa20c52af81d4fbdbb15d899b11eb42a2dd2f06aa075d80bf282e1a503d39b3400b07f9da016061271
|
data/.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
.DS_Store
|
7
|
+
Gemfile.lock
|
8
|
+
InstalledFiles
|
9
|
+
_yardoc
|
10
|
+
coverage
|
11
|
+
doc/
|
12
|
+
lib/bundler/man
|
13
|
+
pkg
|
14
|
+
rdoc
|
15
|
+
spec/reports
|
16
|
+
test/tmp
|
17
|
+
test/version_tmp
|
18
|
+
tmp
|
19
|
+
*~
|
20
|
+
db/*
|
21
|
+
**/db/*
|
22
|
+
!.gitkeep
|
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
gem "sqlite_magic", :git => 'https://github.com/openc/sqlite_magic.git'
|
3
|
+
gem "pry", :group => [:development,:test]
|
4
|
+
# Specify your gem's dependencies in openc_bot.gemspec
|
5
|
+
gemspec
|
6
|
+
|
7
|
+
# we need to do pull request and bump version
|
8
|
+
# gem 'scraperwiki', '>=3.0.2', :git => 'https://github.com/openc/scraperwiki-ruby.git'
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Chris Taggart
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,253 @@
|
|
1
|
+
# OpencBot
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
|
5
|
+
This is a gem to allow bots to be written to fetch and format data
|
6
|
+
that can be easily imported into OpenCorporates, the largest openly
|
7
|
+
licensed database of companies in the world.
|
8
|
+
|
9
|
+
To start writing a new bot, run the following to create a skeleton bot:
|
10
|
+
|
11
|
+
```bash
|
12
|
+
mkdir your_bot_name
|
13
|
+
cd your_bot_name
|
14
|
+
curl -s https://raw.githubusercontent.com/openc/openc_bot/master/create_simple_licence_bot.sh | bash
|
15
|
+
```
|
16
|
+
|
17
|
+
The default bot doesn't scrape, it just outputs some dummy data. You can try:
|
18
|
+
|
19
|
+
* running the scrape with `bundle exec openc_bot rake bot:run`
|
20
|
+
* testing the validity of the data it will output with
|
21
|
+
`bundle exec openc_bot rake bot:test`
|
22
|
+
* viewing a sample of the data with `bundle exec openc_bot rake bot:spotcheck`
|
23
|
+
|
24
|
+
Take a look at the bot code created at
|
25
|
+
`your_bot_name/lib/your_bot_name.rb` and read the comments there to
|
26
|
+
start writing your own bot. Look at the example bots in the
|
27
|
+
`examples/` folder for inspiration, including how to scrape from a
|
28
|
+
website, and how to use "incrementers" to help with resumable,
|
29
|
+
incremental scrapes (see below for more).
|
30
|
+
|
31
|
+
These bots are all runnable; you should be able to `cd` to their
|
32
|
+
directory, run `bundle install`, and then `bundle exec openc_bot rake
|
33
|
+
bot:run`
|
34
|
+
|
35
|
+
You can write bots for any schemas we have defined
|
36
|
+
- see [SCHEMAS.md](./doc/SCHEMAS.md) for currently supported schemas.
|
37
|
+
|
38
|
+
When you are happy that your bot is finished, please update its
|
39
|
+
`README.md`, change the `enabled` flag in `config.yml` to be `true`,
|
40
|
+
and email us.
|
41
|
+
|
42
|
+
Please note that dates are a bit complicated, so we ask you to read
|
43
|
+
the bit about dates below carefully.
|
44
|
+
|
45
|
+
## About fetching and transforming data
|
46
|
+
|
47
|
+
As you'll see in the sample bot, bots have separate steps to fetch
|
48
|
+
data (the `fetch_all_records` method) and to transform it to a format
|
49
|
+
suitable for OpenCorporates (the `to_pipeline` method).
|
50
|
+
|
51
|
+
It is useful to have separate *fetch* and *export* phase for a couple
|
52
|
+
of reasons:
|
53
|
+
|
54
|
+
* For very large source datasets, it can take months to complete a
|
55
|
+
scrape. It is then useful to verify the data quality before
|
56
|
+
ingesting it in OpenCorporates.
|
57
|
+
* Often, datasets may include a load of potentially interesting data
|
58
|
+
which OpenCorporates doesn't yet support. It's worth storing this
|
59
|
+
data in an intermediate format, to save having to scrape it again in
|
60
|
+
the future. Please save anything like that and make a note of it in
|
61
|
+
your `README.md`.
|
62
|
+
|
63
|
+
For more complicated scrapers, you may wish to do things more manually
|
64
|
+
-- see [README-complex.md](./doc/README-complex.md) for more info.
|
65
|
+
|
66
|
+
# A few words about dates
|
67
|
+
|
68
|
+
There are three kinds of dates that OpenCorporates deals with:
|
69
|
+
|
70
|
+
1. The date on which an observation was true: the `sample_date`. This
|
71
|
+
is the date of a bot run, or a reporting date given in the source
|
72
|
+
document. Every observation **must have a sample date**.
|
73
|
+
2. A `start_date` and/or `end_date` defined explicitly in the source
|
74
|
+
document
|
75
|
+
3. A `start_date` or `end_date` that has not been provided by the
|
76
|
+
source, but which OpenCorporates can infer from one or more sample
|
77
|
+
dates. *In this case, you just supply a sample_date, and we do the
|
78
|
+
rest*
|
79
|
+
|
80
|
+
All dates should be in ISO8601 format.
|
81
|
+
|
82
|
+
## A few more words about dates
|
83
|
+
|
84
|
+
One of the important parts of the data format expected by
|
85
|
+
OpenCorporates are the dates a statement is known to be true.
|
86
|
+
|
87
|
+
All statements can be considered to be true between a start date and
|
88
|
+
an end date. Sources that make explicit statements like this are great
|
89
|
+
- but they're rare. For sources that don't explicitly define start and
|
90
|
+
end dates for statements, it is down to OpenCorporates to compute
|
91
|
+
these based on the bot's run schedule, and sample dates in the source
|
92
|
+
data.
|
93
|
+
|
94
|
+
Imagine you are interested in mining licenses in Liliput and
|
95
|
+
Brobdingnag, and you want to provide this data to OpenCorporates. You
|
96
|
+
find a website that lists mining licenses for these jurisdictions, so
|
97
|
+
you write a bot that can submit each license.
|
98
|
+
|
99
|
+
You find that Liliputian licenses have a definied start date and a
|
100
|
+
definied end date, which mean you can explicitly say "this license is
|
101
|
+
valid between 1 June 2012 and 31 Aug 2013" for a particular license.
|
102
|
+
|
103
|
+
In this case, you would submit the data with a `start_date` of
|
104
|
+
`2012-06-01` and an `end_date` of `2013-08-31`; and a
|
105
|
+
`start_date_type` of `=` and an `end_date_type` of `=`. You would
|
106
|
+
also submit a `sample_date` for that document, which is the date on
|
107
|
+
which the license was known to be current (often today's date, but
|
108
|
+
sometimes the reporting date given in the source).
|
109
|
+
|
110
|
+
However, you find that Brobdingnagian licenses only tell you currently
|
111
|
+
issued licenses. As a bot writer, all you can say of a particular
|
112
|
+
license is "I saw this license when we ran the bot on 15 January
|
113
|
+
2012". In this case, you would leave `start_date` and `end_date`
|
114
|
+
blank, and submit a `sample_date` of `2012-01-15` instead.
|
115
|
+
|
116
|
+
If you subsequently see the license on 15 February, you'd submit
|
117
|
+
exactly the same data with a new `sample_date`.
|
118
|
+
|
119
|
+
A bot is expected to be run periodically, at intervals relevant to its
|
120
|
+
source. For example, a bot that scrapes data which changes monthly should
|
121
|
+
scrape at least monthly. You should indicate this in the bot's
|
122
|
+
`config.yml` file.
|
123
|
+
|
124
|
+
This means OpenCorporates can infer, based on the running schedule of
|
125
|
+
the bot, and the `sample_date`s of its data, the dates between which a
|
126
|
+
license was valid (in this case, between 15 January and 15 February).
|
127
|
+
|
128
|
+
Hence the above.
|
129
|
+
|
130
|
+
# Speeding up your tests
|
131
|
+
|
132
|
+
When writing scrapers, it's common to find yourself repeatedly
|
133
|
+
scraping data from a source as you iteratively improve your code. It
|
134
|
+
can be useful to use a caching proxy on your development machine to
|
135
|
+
speed up this cycle.
|
136
|
+
|
137
|
+
If you run `bundle exec openc_bot rake bot:run -- --test`, then your
|
138
|
+
`fetch_records` method will receive an option `test_mode`; you can use
|
139
|
+
this to turn proxying on or off. Here's how you can set a proxy using
|
140
|
+
the `mechanize` library; if you want to use different http client
|
141
|
+
libraries, refer to their documentation regarding how to set a proxy.
|
142
|
+
|
143
|
+
agent = Mechanize.new
|
144
|
+
if opts[:test_mode]
|
145
|
+
# this requires you to have a working proxy set up -- see
|
146
|
+
# README.md for notes. It can speed up development considerably.
|
147
|
+
agent.set_proxy 'localhost', 8123
|
148
|
+
end
|
149
|
+
agent.get("http://www.foo.com") # will get it from local cache the second time
|
150
|
+
|
151
|
+
To make this work, you will also want to set up a caching proxy
|
152
|
+
listening on `localhost:8123`. One such lightweight proxy is
|
153
|
+
[polipo](http://www.pps.univ-paris-diderot.fr/~jch/software/polipo/),
|
154
|
+
which is available packaged for various platforms. The following
|
155
|
+
options in the config work for us:
|
156
|
+
|
157
|
+
cacheIsShared = false
|
158
|
+
disableIndexing = false
|
159
|
+
disableServersList = false
|
160
|
+
relaxTransparency = yes
|
161
|
+
dontTrustVaryETag = yes
|
162
|
+
proxyOffline = no
|
163
|
+
|
164
|
+
# Targetting specific records
|
165
|
+
|
166
|
+
If you define an (optional) `fetch_specific_records` method in your
|
167
|
+
bot, then you can specify particular records you wish to be
|
168
|
+
fetched, thus:
|
169
|
+
|
170
|
+
bundle exec openc_bot rake bot:run -- --identifier "Foo Corp"
|
171
|
+
|
172
|
+
You can also target specific records to export with:
|
173
|
+
|
174
|
+
bundle exec openc_bot rake bot:export -- --identifier "Foo Corp"
|
175
|
+
|
176
|
+
# Incremental, resumable searches
|
177
|
+
|
178
|
+
It's often necessary to do incremental searches or scrapes to get a
|
179
|
+
full set of data. For example, you may know that all the records exist
|
180
|
+
at urls like http://foo.com/?page=1, http://foo.com/?page=2, etc.
|
181
|
+
|
182
|
+
Another common use case is where you can only access records with a
|
183
|
+
search. In these cases, there's no alternative except to search for
|
184
|
+
all the possible permutations of the letters A-Z and numbers 0-9 (in
|
185
|
+
the case of ASCII-searchable databases).
|
186
|
+
|
187
|
+
In the latter case, this is 46656 different possible
|
188
|
+
permutations. This will take a long time to scrape. If for some reason
|
189
|
+
the scraper gets interrupted, you don't want to have to start again.
|
190
|
+
|
191
|
+
We provide some convenience iterators, which save their current state,
|
192
|
+
and restart unless told otherwise. They are probably not worth using for
|
193
|
+
small scrapes (e.g. ones that take 10 mins) as they add to the complexity
|
194
|
+
of your code; however, they are invaluable for large scrapes that may well
|
195
|
+
get interrupted.
|
196
|
+
|
197
|
+
# currently provides a NumericIncrementer and an AsciiIncrementer:
|
198
|
+
require 'openc_bot/incrementers'
|
199
|
+
|
200
|
+
def fetch_all_records(opts={})
|
201
|
+
counter = NumericIncrementer.new(
|
202
|
+
:my_incrementer,
|
203
|
+
opts.merge(
|
204
|
+
:start_val => 0,
|
205
|
+
:end_val => 20))
|
206
|
+
|
207
|
+
# yield records one at a time, resuming by default
|
208
|
+
counter.resumable.each do |num|
|
209
|
+
url = "http://assets.opencorporates.com/test_bot_page_#{num}.html"
|
210
|
+
yield record_from_url(url)
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
The above code would resume an incremental search automatically. To
|
215
|
+
reset, run the bot thus:
|
216
|
+
|
217
|
+
bundle exec openc_bot rake bot:run -- --reset
|
218
|
+
|
219
|
+
When debugging, it is useful to test out only a few iterations at a time. To do this:
|
220
|
+
|
221
|
+
bundle exec openc_bot rake bot:run -- --max-iterations=3
|
222
|
+
|
223
|
+
This will restrict all iterators to a maximum of three iterations.
|
224
|
+
|
225
|
+
There's also an incrementer which you can manually fill with records
|
226
|
+
(arbitrary hashes), thus:
|
227
|
+
|
228
|
+
incrementer = OpencBot::ManualIncrementer.new(
|
229
|
+
:my_incrementer,
|
230
|
+
opts.merge(:fields => [:num]))
|
231
|
+
|
232
|
+
(0..10).each do |num|
|
233
|
+
incrementer.add_row({'num' => num})
|
234
|
+
end
|
235
|
+
|
236
|
+
# now increment over its values, resuming where we left off last time if interrupted
|
237
|
+
incrementer.resumable.each do |item|
|
238
|
+
doc = agent.get("http://assets.opencorporates.com/document_number#{item["num"]}"
|
239
|
+
end
|
240
|
+
|
241
|
+
ManualIncrementers also have a persisted field named `populated`,
|
242
|
+
which you can use to skip expensive record-filling if it's already
|
243
|
+
been done:
|
244
|
+
|
245
|
+
if !incrementer.populated
|
246
|
+
(0..10).each do |num|
|
247
|
+
incrementer.add_row({'num' => num})
|
248
|
+
end
|
249
|
+
end
|
250
|
+
incrementer.populated = true
|
251
|
+
|
252
|
+
There are examples of how this can work in
|
253
|
+
`examples/bot_with_simple_iterator`.
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
# load 'lib/tasks/openc_bot.rake'
|
3
|
+
# require 'lib/tasks'
|
4
|
+
require 'openc_bot/tasks'
|
5
|
+
|
6
|
+
|
7
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + '/../../lib'
|
8
|
+
# require 'resque/tasks'
|
9
|
+
|
10
|
+
Dir.glob('lib/tasks/*.rake').each { |r| import r }
|
11
|
+
|
12
|
+
require 'rspec/core/rake_task'
|
13
|
+
task :default => :spec
|
14
|
+
RSpec::Core::RakeTask.new
|
data/bin/openc_bot
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
gem_dir = File.expand_path("..",File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift gem_dir# Look in gem directory for resources first.
|
3
|
+
exec_type = ARGV[0]
|
4
|
+
if exec_type == 'rake' then
|
5
|
+
require 'rake'
|
6
|
+
require 'pp'
|
7
|
+
pwd=Dir.pwd
|
8
|
+
Dir.chdir(gem_dir) # We'll load rakefile from the gem's dir.
|
9
|
+
Rake.application.init
|
10
|
+
Rake.application.load_rakefile
|
11
|
+
Dir.chdir(pwd) # Revert to original pwd for any path args passed to task.
|
12
|
+
Rake.application.invoke_task(ARGV[1])
|
13
|
+
end
|
data/create_bot.sh
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set -e
|
4
|
+
|
5
|
+
# Add the openc_bot to the Gemfile:
|
6
|
+
if [ ! -f Gemfile ]; then
|
7
|
+
echo "source 'https://rubygems.org'" >> Gemfile
|
8
|
+
echo "gem 'openc_bot', :git => 'https://github.com/openc/openc_bot.git'" >> Gemfile
|
9
|
+
fi
|
10
|
+
|
11
|
+
echo "/db/*" >> .gitignore
|
12
|
+
echo "/data/*" >> .gitignore
|
13
|
+
echo "/tmp/*" >> .gitignore
|
14
|
+
echo "/pids/*" >> .gitignore
|
15
|
+
echo "!.gitkeep" >> .gitignore
|
16
|
+
|
17
|
+
mkdir -p db
|
18
|
+
mkdir -p data
|
19
|
+
mkdir -p tmp
|
20
|
+
mkdir -p pids
|
21
|
+
|
22
|
+
touch db/.gitkeep
|
23
|
+
touch data/.gitkeep
|
24
|
+
touch tmp/.gitkeep
|
25
|
+
touch pids/.gitkeep
|
26
|
+
|
27
|
+
bundle install
|
28
|
+
# create the bot
|
29
|
+
bundle exec openc_bot rake bot:create
|
30
|
+
bundle install
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set -e
|
4
|
+
|
5
|
+
# Add the openc_bot to the Gemfile:
|
6
|
+
if [ ! -f Gemfile ]; then
|
7
|
+
echo "source 'https://rubygems.org'" >> Gemfile
|
8
|
+
echo "gem 'openc_bot', :git => 'https://github.com/openc/openc_bot.git', :branch => 'company_fetcher_bot'" >> Gemfile
|
9
|
+
fi
|
10
|
+
echo "/db" >> .gitignore
|
11
|
+
echo "/data" >> .gitignore
|
12
|
+
echo "/tmp" >> .gitignore
|
13
|
+
bundle install
|
14
|
+
# create the bot
|
15
|
+
bundle exec openc_bot rake bot:create_company_bot
|
16
|
+
bundle install
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set -e
|
4
|
+
|
5
|
+
# Add the openc_bot to the Gemfile:
|
6
|
+
if [ ! -f Gemfile ]; then
|
7
|
+
echo "source 'https://rubygems.org'" >> Gemfile
|
8
|
+
echo "gem 'openc_bot', :git => 'https://github.com/openc/openc_bot.git'" >> Gemfile
|
9
|
+
echo "gem 'mechanize'" >> Gemfile
|
10
|
+
fi
|
11
|
+
|
12
|
+
echo "/db/*" >> .gitignore
|
13
|
+
echo "/data/*" >> .gitignore
|
14
|
+
echo "/tmp/*" >> .gitignore
|
15
|
+
echo "/pids/*" >> .gitignore
|
16
|
+
echo "!.gitkeep" >> .gitignore
|
17
|
+
|
18
|
+
mkdir -p db
|
19
|
+
mkdir -p data
|
20
|
+
mkdir -p tmp
|
21
|
+
mkdir -p pids
|
22
|
+
|
23
|
+
touch db/.gitkeep
|
24
|
+
touch data/.gitkeep
|
25
|
+
touch tmp/.gitkeep
|
26
|
+
touch pids/.gitkeep
|
27
|
+
|
28
|
+
bundle install
|
29
|
+
# create the bot
|
30
|
+
bundle exec openc_bot rake bot:create_simple_bot
|
31
|
+
bundle install
|