couchpopulator 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +41 -9
- data/executors/standard.rb +65 -54
- data/generators/example.rb +13 -11
- data/lib/couchpopulator/base.rb +13 -14
- data/lib/couchpopulator/couch_helper.rb +4 -2
- data/lib/couchpopulator/initializer.rb +26 -13
- data/lib/couchpopulator/misc_helper.rb +2 -0
- data/lib/couchpopulator.rb +0 -1
- data/lib/curl_adapter.rb +1 -5
- data/lib/logger.rb +1 -1
- metadata +4 -4
data/README.md
CHANGED
@@ -15,7 +15,7 @@ This project is in a very early state. I'm sure it has some serious bugs and it'
|
|
15
15
|
|
16
16
|
# Getting Started
|
17
17
|
|
18
|
-
##
|
18
|
+
## Installation
|
19
19
|
|
20
20
|
sudo gem install couchpopulator
|
21
21
|
|
@@ -27,18 +27,25 @@ This project is in a very early state. I'm sure it has some serious bugs and it'
|
|
27
27
|
rake build
|
28
28
|
|
29
29
|
## Getting help
|
30
|
+
CouchPopulator tries to give good help on command line options by using:
|
30
31
|
|
31
32
|
couchpopulator --help
|
33
|
+
|
34
|
+
To get command line options to a specific execution engine, simply use:
|
35
|
+
|
36
|
+
couchpopulator [EXECUTOR] --help
|
32
37
|
|
33
38
|
## Custom Generators
|
34
39
|
Custom generators only need to implement one method. Have a look:
|
35
40
|
|
36
|
-
module
|
37
|
-
|
38
|
-
class
|
39
|
-
|
40
|
-
|
41
|
-
|
41
|
+
module CouchPopulator
|
42
|
+
module Generators
|
43
|
+
class Example
|
44
|
+
class << self
|
45
|
+
def generate(count)
|
46
|
+
# ...heavy generating action goes here...
|
47
|
+
# return array of hashes (documents)
|
48
|
+
end
|
42
49
|
end
|
43
50
|
end
|
44
51
|
end
|
@@ -48,14 +55,39 @@ generate(count) should return an array of documents. Each document should be an
|
|
48
55
|
|
49
56
|
|
50
57
|
## Custom Execution Engines
|
51
|
-
Custom execute engines need to implement two methods `
|
58
|
+
Custom execute engines need to implement two methods `command_line_options` and `execute`. See `executors/standard.rb` for an example.
|
59
|
+
|
60
|
+
# Using the CouchPopulator API
|
61
|
+
This is the very first version of the CouchPopulator API (introduced in v0.2.0). The interface is still very ugly and will change significantly in the future.
|
62
|
+
|
63
|
+
The options for `CouchPopulator::Base`'s initializer are the same as the command line options. No rules without exceptions:
|
64
|
+
|
65
|
+
* `:logger` needs to be set to a instance of `CouchPopulator::Logger`
|
66
|
+
* `:generator_klass` needs to be set to the generator constant `CouchPopulator::Initializer.generator()` tries to load it
|
67
|
+
* `:executor_klass` needs to be set to the generator constant `CouchPopulator::Initializer.executor()` tries to load it
|
68
|
+
|
69
|
+
Example:
|
70
|
+
|
71
|
+
require 'rubygems'
|
72
|
+
require 'couchpopulator'
|
73
|
+
|
74
|
+
options = { :logger => CouchPopulator::Logger.new,
|
75
|
+
:generator_klass => CouchPopulator::Initializer.generator('example'),
|
76
|
+
:executor_klass => CouchPopulator::Initializer.executor('standard'),
|
77
|
+
:couch => 'http://localhost:5984/test',
|
78
|
+
:docs_per_chunk => 1,
|
79
|
+
:rounds => 1,
|
80
|
+
:concurrent_inserts => 1 }
|
52
81
|
|
82
|
+
CouchPopulator::Base.new(options).populate
|
53
83
|
|
54
84
|
# TODO
|
85
|
+
- make the API suck less
|
86
|
+
- Add support for using a configuration YAML
|
55
87
|
- Find out the best strategies for inserting docs to CouchDB and provide execution engines for different approches
|
56
88
|
- Implement some more features, like dumping-options for generated documents or load dumped JSON docs to CouchDB
|
57
89
|
- Think about a test suite and implement it
|
58
|
-
- hunting bugs, make it cleaner
|
90
|
+
- hunting bugs, make it cleaner
|
59
91
|
|
60
92
|
|
61
93
|
|
data/executors/standard.rb
CHANGED
@@ -1,72 +1,83 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
module CouchPopulator
|
2
|
+
module Executors
|
3
|
+
class Standard
|
4
|
+
def initialize(opts={})
|
5
|
+
@options = self.class.defaults.merge(opts)
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.defaults
|
9
|
+
@defaults ||= {
|
10
|
+
:docs_per_chunk => 1000,
|
11
|
+
:concurrent_inserts => 5,
|
12
|
+
:rounds => 1
|
13
|
+
}
|
14
|
+
end
|
6
15
|
|
7
|
-
|
8
|
-
|
16
|
+
def self.command_line_options
|
17
|
+
help = StringIO.new
|
9
18
|
|
10
|
-
|
11
|
-
version "StandardExecutor v0.1 (c) Sebastian Cohnen, 2009"
|
12
|
-
banner <<-BANNER
|
13
|
-
This is the StandardExecutor
|
14
|
-
BANNER
|
15
|
-
opt :docs_per_chunk, "Number of docs per chunk", :default => 2000
|
16
|
-
opt :concurrent_inserts, "Number of concurrent inserts", :default => 5
|
17
|
-
opt :rounds, "Number of rounds", :default => 2
|
18
|
-
opt :preflight, "Generate the docs, but don't write to couch. Use with ", :default => false
|
19
|
-
opt :help, "Show this message"
|
19
|
+
defaults = self.defaults
|
20
20
|
|
21
|
-
|
22
|
-
|
21
|
+
opts = Trollop.options do
|
22
|
+
version "StandardExecutor v0.1 (c) Sebastian Cohnen, 2009"
|
23
|
+
banner <<-BANNER
|
24
|
+
This is the StandardExecutor
|
25
|
+
BANNER
|
26
|
+
opt :docs_per_chunk, "Number of docs per chunk", :default => defaults[:docs_per_chunk]
|
27
|
+
opt :concurrent_inserts, "Number of concurrent inserts", :default => defaults[:concurrent_inserts]
|
28
|
+
opt :rounds, "Number of rounds", :default => defaults[:rounds]
|
29
|
+
opt :help, "Show this message"
|
23
30
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
31
|
+
educate(help)
|
32
|
+
end
|
33
|
+
|
34
|
+
if opts[:help]
|
35
|
+
puts help.rewind.read
|
36
|
+
exit
|
37
|
+
else
|
38
|
+
return opts
|
39
|
+
end
|
29
40
|
end
|
30
|
-
end
|
31
41
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
42
|
+
def execute
|
43
|
+
rounds = @options[:rounds]
|
44
|
+
docs_per_chunk = @options[:docs_per_chunk]
|
45
|
+
concurrent_inserts = @options[:concurrent_inserts]
|
46
|
+
generator = @options[:generator_klass]
|
37
47
|
|
38
|
-
|
39
|
-
|
40
|
-
|
48
|
+
log = @options[:logger]
|
49
|
+
log << "CouchPopulator's default execution engine has been started."
|
50
|
+
log << "Using #{generator.to_s} for generating the documents."
|
41
51
|
|
42
|
-
|
43
|
-
|
44
|
-
|
52
|
+
total_docs = docs_per_chunk * concurrent_inserts * rounds
|
53
|
+
log << "Going to insert #{total_docs} generated docs into #{@options[:couch_url]}"
|
54
|
+
log << "Using #{rounds} rounds of #{concurrent_inserts} concurrent inserts with #{docs_per_chunk} docs each"
|
45
55
|
|
46
|
-
|
56
|
+
start_time = Time.now
|
47
57
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
58
|
+
rounds.times do |round|
|
59
|
+
log << "Starting with round #{round + 1}"
|
60
|
+
concurrent_inserts.times do
|
61
|
+
fork do
|
62
|
+
# generate payload for bulk_doc
|
63
|
+
payload = JSON.generate({"docs" => generator.generate(docs_per_chunk)})
|
54
64
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
65
|
+
unless @options[:generate_only]
|
66
|
+
result = CurlAdapter::Invoker.new(@options[:couch_url]).post(payload)
|
67
|
+
else
|
68
|
+
log << "Generated chunk..."
|
69
|
+
puts payload
|
70
|
+
end
|
60
71
|
end
|
61
72
|
end
|
73
|
+
concurrent_inserts.times { Process.wait() }
|
62
74
|
end
|
63
|
-
|
75
|
+
|
76
|
+
end_time = Time.now
|
77
|
+
duration = end_time - start_time
|
78
|
+
|
79
|
+
log << "Execution time: #{duration}s, #{@options[:generate_only] ? "generated" : "inserted"} #{total_docs}"
|
64
80
|
end
|
65
|
-
|
66
|
-
end_time = Time.now
|
67
|
-
duration = end_time - start_time
|
68
|
-
|
69
|
-
log << "Execution time: #{duration}s, inserted #{total_docs}"
|
70
81
|
end
|
71
82
|
end
|
72
83
|
end
|
data/generators/example.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
class
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
1
|
+
module CouchPopulator
|
2
|
+
module Generators
|
3
|
+
class Example
|
4
|
+
class << self
|
5
|
+
def generate(count)
|
6
|
+
docs = []
|
7
|
+
count.times do
|
8
|
+
docs << {
|
9
|
+
"title" => "Example",
|
10
|
+
"created_at" => Time.now - (rand(7) * 60*60*24)
|
11
|
+
}
|
12
|
+
end
|
13
|
+
docs
|
11
14
|
end
|
12
|
-
docs
|
13
15
|
end
|
14
16
|
end
|
15
17
|
end
|
data/lib/couchpopulator/base.rb
CHANGED
@@ -1,24 +1,23 @@
|
|
1
1
|
module CouchPopulator
|
2
2
|
class Base
|
3
|
-
def initialize(options)
|
4
|
-
@
|
5
|
-
|
6
|
-
@
|
3
|
+
def initialize(options, called_from_command_line = false)
|
4
|
+
@options = options
|
5
|
+
|
6
|
+
@options[:couch_url] = CouchHelper.get_full_couchurl options[:couch] unless @options[:couch].nil?
|
7
|
+
@options.merge!(@options[:executor_klass].command_line_options) if called_from_command_line
|
7
8
|
end
|
8
9
|
|
9
10
|
def populate
|
10
|
-
@
|
11
|
-
|
12
|
-
@opts[:database] ||= database
|
13
|
-
@opts[:executor_klass].new(@opts).execute
|
11
|
+
@options[:executor_klass].new(@options).execute
|
14
12
|
end
|
13
|
+
end
|
15
14
|
|
16
|
-
|
17
|
-
|
18
|
-
end
|
15
|
+
class CouchPopulatorError < StandardError
|
16
|
+
end
|
19
17
|
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
class GeneratorNotFound < CouchPopulatorError
|
19
|
+
end
|
20
|
+
|
21
|
+
class ExecutorNotFound < CouchPopulatorError
|
23
22
|
end
|
24
23
|
end
|
@@ -1,12 +1,14 @@
|
|
1
1
|
module CouchPopulator
|
2
|
-
# Borrowed from Rails
|
3
|
-
# http://github.com/rails/rails/blob/ea0e41d8fa5a132a2d2771e9785833b7663203ac/activesupport/lib/active_support/inflector.rb#L355
|
4
2
|
class CouchHelper
|
5
3
|
class << self
|
6
4
|
def get_full_couchurl(arg)
|
7
5
|
arg.match(/^https?:\/\//) ? arg : URI.join('http://127.0.0.1:5984/', arg).to_s
|
8
6
|
end
|
9
7
|
|
8
|
+
def get_database_from_couchurl(url)
|
9
|
+
URI.parse(url).path
|
10
|
+
end
|
11
|
+
|
10
12
|
def couch_available? (couch_url)
|
11
13
|
# TODO this uri-thing is ugly :/
|
12
14
|
tmp = URI.parse(couch_url)
|
@@ -25,9 +25,23 @@ module CouchPopulator
|
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
|
+
# Get the generator_klass and executor_klass
|
29
|
+
generator_klass = begin
|
30
|
+
generator(command_line_options[:generator])
|
31
|
+
rescue CouchPopulator::GeneratorNotFound
|
32
|
+
Trollop.die :generator, "Generator must be set, a valid class-name and respond to generate(n)"
|
33
|
+
end
|
34
|
+
|
35
|
+
executor_klass = begin
|
36
|
+
executor(executor(ARGV.shift || "standard"))
|
37
|
+
rescue CouchPopulator::ExecutorNotFound
|
38
|
+
Trollop.die "Executor must be set and a valid class-name"
|
39
|
+
end
|
40
|
+
|
41
|
+
|
28
42
|
# Initialize CouchPopulator
|
29
|
-
options = ({:executor_klass =>
|
30
|
-
CouchPopulator::Base.new(options).populate
|
43
|
+
options = ({:executor_klass => executor_klass, :generator_klass => generator_klass, :logger => CouchPopulator::Logger.new(command_line_options[:logfile])}).merge(command_line_options)
|
44
|
+
CouchPopulator::Base.new(options, true).populate
|
31
45
|
end
|
32
46
|
|
33
47
|
# Define some command-line options
|
@@ -57,35 +71,34 @@ OPTIONS:
|
|
57
71
|
end
|
58
72
|
end
|
59
73
|
|
60
|
-
# Get the requested generator or die
|
61
|
-
def generator
|
74
|
+
# Get the requested generator constant or die
|
75
|
+
def generator(generator)
|
62
76
|
retried = false
|
63
77
|
@generator ||= begin
|
64
|
-
generator_klass = CouchPopulator::MiscHelper.camelize_and_constantize("generators/#{
|
78
|
+
generator_klass = CouchPopulator::MiscHelper.camelize_and_constantize("couch_populator/generators/#{generator}")
|
65
79
|
rescue NameError
|
66
80
|
begin
|
67
|
-
require File.join(File.dirname(__FILE__), "../../generators/#{
|
81
|
+
require File.join(File.dirname(__FILE__), "../../generators/#{generator}.rb")
|
68
82
|
rescue LoadError; end # just catch, do nothing
|
69
83
|
retry if (retried = !retried)
|
70
84
|
ensure
|
71
|
-
|
85
|
+
raise CouchPopulator::GeneratorNotFound if generator_klass.nil?
|
72
86
|
generator_klass
|
73
87
|
end
|
74
88
|
end
|
75
89
|
|
76
|
-
# Get the exexcutor (defaults to standard) or die
|
77
|
-
def executor
|
90
|
+
# Get the exexcutor constant (defaults to standard) or die
|
91
|
+
def executor(executor)
|
78
92
|
retried = false
|
79
93
|
@executor ||= begin
|
80
|
-
|
81
|
-
executor_klass = CouchPopulator::MiscHelper.camelize_and_constantize("executors/#{executor_cmd}")
|
94
|
+
executor_klass = CouchPopulator::MiscHelper.camelize_and_constantize("couch_populator/executors/#{executor}")
|
82
95
|
rescue NameError
|
83
96
|
begin
|
84
|
-
require File.join(File.dirname(__FILE__), "../../executors/#{
|
97
|
+
require File.join(File.dirname(__FILE__), "../../executors/#{executor}.rb")
|
85
98
|
rescue NameError, LoadError; end # just catch, do nothing
|
86
99
|
retry if (retried = !retried)
|
87
100
|
ensure
|
88
|
-
|
101
|
+
raise CouchPopulator::ExecutorNotFound if executor_klass.nil?
|
89
102
|
executor_klass
|
90
103
|
end
|
91
104
|
end
|
@@ -6,6 +6,8 @@ module CouchPopulator
|
|
6
6
|
constantize(camelize(lower_case_and_underscored_word))
|
7
7
|
end
|
8
8
|
|
9
|
+
# Borrowed from Rails
|
10
|
+
# http://github.com/rails/rails/blob/ea0e41d8fa5a132a2d2771e9785833b7663203ac/activesupport/lib/active_support/inflector.rb#L355
|
9
11
|
def camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
|
10
12
|
if first_letter_in_uppercase
|
11
13
|
lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
|
data/lib/couchpopulator.rb
CHANGED
@@ -5,7 +5,6 @@ require 'uri'
|
|
5
5
|
require 'json/add/rails'
|
6
6
|
require 'json/add/core'
|
7
7
|
|
8
|
-
require File.join(File.dirname(__FILE__), 'couchpopulator.rb')
|
9
8
|
require File.join(File.dirname(__FILE__), 'curl_adapter.rb')
|
10
9
|
require File.join(File.dirname(__FILE__), 'generator.rb')
|
11
10
|
require File.join(File.dirname(__FILE__), 'logger.rb')
|
data/lib/curl_adapter.rb
CHANGED
data/lib/logger.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: couchpopulator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sebastian Cohnen
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-11-
|
12
|
+
date: 2009-11-17 00:00:00 +01:00
|
13
13
|
default_executable: couchpopulator
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: "1.15"
|
34
34
|
version:
|
35
|
-
description:
|
35
|
+
description: The idea behind this tool is to provide a framework for populating your CouchDB instances with generated documents. It provides a plug-able system for easy writing own generators. Also the the process, which invokes the generator and manages the insertion to CouchDB, what I call execution engines, are easily exchangeable. The default execution engine uses CouchDB's bulk-docs-API with configurable chunk-size, concurrent inserts and total chunks to insert.
|
36
36
|
email: sebastian.cohnen@gmx.net
|
37
37
|
executables:
|
38
38
|
- couchpopulator
|
@@ -86,6 +86,6 @@ rubyforge_project:
|
|
86
86
|
rubygems_version: 1.3.5
|
87
87
|
signing_key:
|
88
88
|
specification_version: 3
|
89
|
-
summary:
|
89
|
+
summary: Flexible tool for populating CouchDB with generated documents
|
90
90
|
test_files: []
|
91
91
|
|