topfunky-couchrest 0.9.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/LICENSE +176 -0
  2. data/README.rdoc +51 -0
  3. data/Rakefile +87 -0
  4. data/THANKS +15 -0
  5. data/bin/couchdir +20 -0
  6. data/bin/couchview +48 -0
  7. data/examples/word_count/markov +38 -0
  8. data/examples/word_count/views/books/chunked-map.js +3 -0
  9. data/examples/word_count/views/books/united-map.js +1 -0
  10. data/examples/word_count/views/markov/chain-map.js +6 -0
  11. data/examples/word_count/views/markov/chain-reduce.js +7 -0
  12. data/examples/word_count/views/word_count/count-map.js +6 -0
  13. data/examples/word_count/views/word_count/count-reduce.js +3 -0
  14. data/examples/word_count/word_count.rb +67 -0
  15. data/examples/word_count/word_count_query.rb +39 -0
  16. data/lib/couchrest.rb +92 -0
  17. data/lib/couchrest/commands/generate.rb +71 -0
  18. data/lib/couchrest/commands/push.rb +99 -0
  19. data/lib/couchrest/core/database.rb +105 -0
  20. data/lib/couchrest/core/server.rb +49 -0
  21. data/lib/couchrest/helper/file_manager.rb +223 -0
  22. data/lib/couchrest/helper/pager.rb +103 -0
  23. data/lib/couchrest/helper/streamer.rb +29 -0
  24. data/lib/couchrest/monkeypatches.rb +22 -0
  25. data/spec/couchrest_spec.rb +92 -0
  26. data/spec/database_spec.rb +429 -0
  27. data/spec/file_manager_spec.rb +116 -0
  28. data/spec/fixtures/attachments/test.html +11 -0
  29. data/spec/fixtures/views/lib.js +3 -0
  30. data/spec/fixtures/views/test_view/lib.js +3 -0
  31. data/spec/fixtures/views/test_view/only-map.js +4 -0
  32. data/spec/fixtures/views/test_view/test-map.js +3 -0
  33. data/spec/fixtures/views/test_view/test-reduce.js +3 -0
  34. data/spec/pager_spec.rb +122 -0
  35. data/spec/spec.opts +6 -0
  36. data/spec/spec_helper.rb +4 -0
  37. data/spec/streamer_spec.rb +23 -0
  38. data/utils/remap.rb +27 -0
  39. data/utils/subset.rb +30 -0
  40. metadata +124 -0
@@ -0,0 +1 @@
1
+ function(doc){if(doc.text && doc.text.match(/united/)) emit([doc.title, doc.chunk],null)}
@@ -0,0 +1,6 @@
1
+ function(doc){
2
+ var words = doc.text.split(/\W/).filter(function(w) {return w.length > 0}).map(function(w){return w.toLowerCase()});
3
+ for (var i = 0, l = words.length; i < l; i++) {
4
+ emit(words.slice(i,4),doc.title);
5
+ }
6
+ }
@@ -0,0 +1,7 @@
1
+ function(key,vs,c){
2
+ if (c) {
3
+ return sum(vs);
4
+ } else {
5
+ return vs.length;
6
+ }
7
+ }
@@ -0,0 +1,6 @@
1
+ function(doc){
2
+ var words = doc.text.split(/\W/).map(function(w){return w.toLowerCase()});
3
+ words.forEach(function(word){
4
+ if (word.length > 0) emit([word,doc.title],1);
5
+ });
6
+ }
@@ -0,0 +1,3 @@
1
+ function(key,combine){
2
+ return sum(combine);
3
+ }
@@ -0,0 +1,67 @@
1
+ require File.dirname(__FILE__) + '/../../couchrest'
2
+
3
+ couch = CouchRest.new("http://localhost:5984")
4
+ db = couch.database('word-count-example')
5
+ db.delete! rescue nil
6
+ db = couch.create_db('word-count-example')
7
+
8
+ books = {
9
+ 'outline-of-science.txt' => 'http://www.gutenberg.org/files/20417/20417.txt',
10
+ 'ulysses.txt' => 'http://www.gutenberg.org/dirs/etext03/ulyss12.txt',
11
+ 'america.txt' => 'http://www.gutenberg.org/files/16960/16960.txt',
12
+ 'da-vinci.txt' => 'http://www.gutenberg.org/dirs/etext04/7ldv110.txt'
13
+ }
14
+
15
+ books.each do |file, url|
16
+ pathfile = File.join(File.dirname(__FILE__),file)
17
+ `curl #{url} > #{pathfile}` unless File.exists?(pathfile)
18
+ end
19
+
20
+
21
+ books.keys.each do |book|
22
+ title = book.split('.')[0]
23
+ puts title
24
+ File.open(File.join(File.dirname(__FILE__),book),'r') do |file|
25
+ lines = []
26
+ chunk = 0
27
+ while line = file.gets
28
+ lines << line
29
+ if lines.length > 10
30
+ db.save({
31
+ :title => title,
32
+ :chunk => chunk,
33
+ :text => lines.join('')
34
+ })
35
+ chunk += 1
36
+ puts chunk
37
+ lines = []
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+ # word_count = {
44
+ # :map => 'function(doc){
45
+ # var words = doc.text.split(/\W/);
46
+ # words.forEach(function(word){
47
+ # if (word.length > 0) emit([word,doc.title],1);
48
+ # });
49
+ # }',
50
+ # :reduce => 'function(key,combine){
51
+ # return sum(combine);
52
+ # }'
53
+ # }
54
+ #
55
+ # db.delete db.get("_design/word_count") rescue nil
56
+ #
57
+ # db.save({
58
+ # "_id" => "_design/word_count",
59
+ # :views => {
60
+ # :count => word_count,
61
+ # :words => {:map => word_count[:map]}
62
+ # }
63
+ # })
64
+
65
+ # puts "The books have been stored in your CouchDB. To initiate the MapReduce process, visit http://localhost:5984/_utils/ in your browser and click 'word-count-example', then select view 'words' or 'count'. The process could take about 15 minutes on an average MacBook."
66
+ #
67
+
@@ -0,0 +1,39 @@
1
+ require File.dirname(__FILE__) + '/../../couchrest'
2
+
3
+ couch = CouchRest.new("http://localhost:5984")
4
+ db = couch.database('word-count-example')
5
+
6
+ puts "Now that we've parsed all those books into CouchDB, the queries we can run are incredibly flexible."
7
+ puts "\nThe simplest query we can run is the total word count for all words in all documents:"
8
+
9
+ puts db.view('word_count/count').inspect
10
+
11
+ puts "\nWe can also narrow the query down to just one word, across all documents. Here is the count for 'flight' in all three books:"
12
+
13
+ word = 'flight'
14
+ params = {
15
+ :startkey => [word],
16
+ :endkey => [word,'Z']
17
+ }
18
+
19
+ puts db.view('word_count/count',params).inspect
20
+
21
+ puts "\nWe scope the query using startkey and endkey params to take advantage of CouchDB's collation ordering. Here are the params for the last query:"
22
+ puts params.inspect
23
+
24
+ puts "\nWe can also count words on a per-title basis."
25
+
26
+ title = 'da-vinci'
27
+ params = {
28
+ :key => [word, title]
29
+ }
30
+
31
+ puts db.view('word_count/count',params).inspect
32
+
33
+
34
+ puts "\nHere are the params for 'flight' in the da-vinci book:"
35
+ puts params.inspect
36
+ puts
37
+ puts 'The url looks like this:'
38
+ puts 'http://localhost:5984/word-count-example/_view/word_count/count?key=["flight","da-vinci"]'
39
+ puts "\nTry dropping that in your browser..."
data/lib/couchrest.rb ADDED
@@ -0,0 +1,92 @@
1
+ # Copyright 2008 J. Chris Anderson
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ require "rubygems"
16
+ require 'json'
17
+ require 'rest_client'
18
+
19
+ $:.unshift File.dirname(__FILE__) unless
20
+ $:.include?(File.dirname(__FILE__)) ||
21
+ $:.include?(File.expand_path(File.dirname(__FILE__)))
22
+
23
+
24
+ require 'couchrest/monkeypatches'
25
+
26
+ module CouchRest
27
+ autoload :Server, 'couchrest/core/server'
28
+ autoload :Database, 'couchrest/core/database'
29
+ autoload :Pager, 'couchrest/helper/pager'
30
+ autoload :FileManager, 'couchrest/helper/file_manager'
31
+ autoload :Streamer, 'couchrest/helper/streamer'
32
+
33
+ # The CouchRest module methods handle the basic JSON serialization
34
+ # and deserialization, as well as query parameters. The module also includes
35
+ # some helpers for tasks like instantiating a new Database or Server instance.
36
+ class << self
37
+
38
+ # todo, make this parse the url and instantiate a Server or Database instance
39
+ # depending on the specificity.
40
+ def new(*opts)
41
+ Server.new(*opts)
42
+ end
43
+
44
+ # ensure that a database exists
45
+ # creates it if it isn't already there
46
+ # returns it after it's been created
47
+ def database! url
48
+ uri = URI.parse url
49
+ path = uri.path
50
+ uri.path = ''
51
+ cr = CouchRest.new(uri.to_s)
52
+ cr.database!(path)
53
+ end
54
+
55
+ def database url
56
+ uri = URI.parse url
57
+ path = uri.path
58
+ uri.path = ''
59
+ cr = CouchRest.new(uri.to_s)
60
+ cr.database(path)
61
+ end
62
+
63
+ def put uri, doc = nil
64
+ payload = doc.to_json if doc
65
+ JSON.parse(RestClient.put(uri, payload))
66
+ end
67
+
68
+ def get uri
69
+ JSON.parse(RestClient.get(uri), :max_nesting => false)
70
+ end
71
+
72
+ def post uri, doc = nil
73
+ payload = doc.to_json if doc
74
+ JSON.parse(RestClient.post(uri, payload))
75
+ end
76
+
77
+ def delete uri
78
+ JSON.parse(RestClient.delete(uri))
79
+ end
80
+
81
+ def paramify_url url, params = nil
82
+ if params
83
+ query = params.collect do |k,v|
84
+ v = v.to_json if %w{key startkey endkey}.include?(k.to_s)
85
+ "#{k}=#{CGI.escape(v.to_s)}"
86
+ end.join("&")
87
+ url = "#{url}?#{query}"
88
+ end
89
+ url
90
+ end
91
+ end # class << self
92
+ end
@@ -0,0 +1,71 @@
1
+ require 'fileutils'
2
+
3
+ module CouchRest
4
+ module Commands
5
+ module Generate
6
+
7
+ def self.run(options)
8
+ directory = options[:directory]
9
+ design_names = options[:trailing_args]
10
+
11
+ FileUtils.mkdir_p(directory)
12
+ filename = File.join(directory, "lib.js")
13
+ self.write(filename, <<-FUNC)
14
+ // Put global functions here.
15
+ // Include in your views with
16
+ //
17
+ // //include-lib
18
+ FUNC
19
+
20
+ design_names.each do |design_name|
21
+ subdirectory = File.join(directory, design_name)
22
+ FileUtils.mkdir_p(subdirectory)
23
+ filename = File.join(subdirectory, "sample-map.js")
24
+ self.write(filename, <<-FUNC)
25
+ function(doc) {
26
+ // Keys is first letter of _id
27
+ emit(doc._id[0], doc);
28
+ }
29
+ FUNC
30
+
31
+ filename = File.join(subdirectory, "sample-reduce.js")
32
+ self.write(filename, <<-FUNC)
33
+ function(keys, values) {
34
+ // Count the number of keys starting with this letter
35
+ return values.length;
36
+ }
37
+ FUNC
38
+
39
+ filename = File.join(subdirectory, "lib.js")
40
+ self.write(filename, <<-FUNC)
41
+ // Put functions specific to '#{design_name}' here.
42
+ // Include in your views with
43
+ //
44
+ // //include-lib
45
+ FUNC
46
+ end
47
+ end
48
+
49
+ def self.help
50
+ helpstring = <<-GEN
51
+
52
+ Usage: couchview generate directory design1 design2 design3 ...
53
+
54
+ Couchview will create directories and example views for the design documents you specify.
55
+
56
+ GEN
57
+ helpstring.gsub(/^ /, '')
58
+ end
59
+
60
+ def self.write(filename, contents)
61
+ puts "Writing #{filename}"
62
+ File.open(filename, "w") do |f|
63
+ # Remove leading spaces
64
+ contents.gsub!(/^ ( )?/, '')
65
+ f.write contents
66
+ end
67
+ end
68
+
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,99 @@
1
+ module CouchRest
2
+
3
+ module Commands
4
+
5
+ module Push
6
+
7
+ def self.run(options)
8
+ directory = options[:directory]
9
+ database = options[:trailing_args].first
10
+
11
+ fm = CouchRest::FileManager.new(database)
12
+ fm.loud = options[:loud]
13
+ puts "Pushing views from directory #{directory} to database #{fm.db}"
14
+ fm.push_views(directory)
15
+ end
16
+
17
+ def self.help
18
+ helpstring = <<-GEN
19
+
20
+ == Pushing views with Couchview ==
21
+
22
+ Usage: couchview push directory dbname
23
+
24
+ Couchview expects a specific filesystem layout for your CouchDB views (see
25
+ example below). It also supports advanced features like inlining of library
26
+ code (so you can keep DRY) as well as avoiding unnecessary document
27
+ modification.
28
+
29
+ Couchview also solves a problem with CouchDB's view API, which only provides
30
+ access to the final reduce side of any views which have both a map and a
31
+ reduce function defined. The intermediate map results are often useful for
32
+ development and production. CouchDB is smart enough to reuse map indexes for
33
+ functions duplicated across views within the same design document.
34
+
35
+ For views with a reduce function defined, Couchview creates both a reduce view
36
+ and a map-only view, so that you can browse and query the map side as well as
37
+ the reduction, with no performance penalty.
38
+
39
+ == Example ==
40
+
41
+ couchview push foo-project/bar-views baz-database
42
+
43
+ This will push the views defined in foo-project/bar-views into a database
44
+ called baz-database. Couchview expects the views to be defined in files with
45
+ names like:
46
+
47
+ foo-project/bar-views/my-design/viewname-map.js
48
+ foo-project/bar-views/my-design/viewname-reduce.js
49
+ foo-project/bar-views/my-design/noreduce-map.js
50
+
51
+ Pushed to => http://localhost:5984/baz-database/_design/my-design
52
+
53
+ And the design document:
54
+ {
55
+ "views" : {
56
+ "viewname-map" : {
57
+ "map" : "### contents of view-name-map.js ###"
58
+ },
59
+ "viewname-reduce" : {
60
+ "map" : "### contents of view-name-map.js ###",
61
+ "reduce" : "### contents of view-name-reduce.js ###"
62
+ },
63
+ "noreduce-map" : {
64
+ "map" : "### contents of noreduce-map.js ###"
65
+ }
66
+ }
67
+ }
68
+
69
+ Couchview will create a design document for each subdirectory of the views
70
+ directory specified on the command line.
71
+
72
+ == Library Inlining ==
73
+
74
+ Couchview can optionally inline library code into your views so you only have
75
+ to maintain it in one place. It looks for any files named lib.* in your
76
+ design-doc directory (for doc specific libs) and in the parent views directory
77
+ (for project global libs). These libraries are only inserted into views which
78
+ include the text
79
+
80
+ //include-lib
81
+
82
+ or
83
+
84
+ #include-lib
85
+
86
+ Couchview is a result of scratching my own itch. I'd be happy to make it more
87
+ general, so please contact me at jchris@grabb.it if you'd like to see anything
88
+ added or changed.
89
+
90
+ GEN
91
+ helpstring.gsub(/^ /, '')
92
+ end
93
+
94
+ end
95
+
96
+
97
+ end
98
+
99
+ end
@@ -0,0 +1,105 @@
1
+ require 'cgi'
2
+ require "base64"
3
+
4
+ module CouchRest
5
+ class Database
6
+ attr_reader :server, :host, :name, :root
7
+
8
+ def initialize server, name
9
+ @name = name
10
+ @server = server
11
+ @host = server.uri
12
+ @root = "#{host}/#{name}"
13
+ end
14
+
15
+ def to_s
16
+ @root
17
+ end
18
+
19
+ def info
20
+ CouchRest.get @root
21
+ end
22
+
23
+ def documents params = nil
24
+ url = CouchRest.paramify_url "#{@root}/_all_docs", params
25
+ CouchRest.get url
26
+ end
27
+
28
+ def temp_view funcs, params = nil
29
+ url = CouchRest.paramify_url "#{@root}/_temp_view", params
30
+ JSON.parse(RestClient.post(url, funcs.to_json, {"Content-Type" => 'application/json'}))
31
+ end
32
+
33
+ def view name, params = nil
34
+ url = CouchRest.paramify_url "#{@root}/_view/#{name}", params
35
+ CouchRest.get url
36
+ end
37
+
38
+ # experimental
39
+ def search params = nil
40
+ url = CouchRest.paramify_url "#{@root}/_search", params
41
+ CouchRest.get url
42
+ end
43
+ # experimental
44
+ def action action, params = nil
45
+ url = CouchRest.paramify_url "#{@root}/_action/#{action}", params
46
+ CouchRest.get url
47
+ end
48
+
49
+ def get id
50
+ slug = CGI.escape(id)
51
+ CouchRest.get "#{@root}/#{slug}"
52
+ end
53
+
54
+ def fetch_attachment doc, name
55
+ doc = CGI.escape(doc)
56
+ name = CGI.escape(name)
57
+ RestClient.get "#{@root}/#{doc}/#{name}"
58
+ end
59
+
60
+ # PUT or POST depending on presence of _id attribute
61
+ def save doc
62
+ if doc['_attachments']
63
+ doc['_attachments'] = encode_attachments(doc['_attachments'])
64
+ end
65
+ if doc['_id']
66
+ slug = CGI.escape(doc['_id'])
67
+ else
68
+ slug = doc['_id'] = @server.next_uuid
69
+ end
70
+ CouchRest.put "#{@root}/#{slug}", doc
71
+ end
72
+
73
+ def bulk_save docs
74
+ ids, noids = docs.partition{|d|d['_id']}
75
+ uuid_count = [noids.length, @server.uuid_batch_count].max
76
+ noids.each do |doc|
77
+ doc['_id'] = @server.next_uuid(uuid_count)
78
+ end
79
+ CouchRest.post "#{@root}/_bulk_docs", {:docs => docs}
80
+ end
81
+
82
+ def delete doc
83
+ slug = CGI.escape(doc['_id'])
84
+ CouchRest.delete "#{@root}/#{slug}?rev=#{doc['_rev']}"
85
+ end
86
+
87
+ def delete!
88
+ CouchRest.delete @root
89
+ end
90
+
91
+ private
92
+
93
+ def encode_attachments attachments
94
+ attachments.each do |k,v|
95
+ next if v['stub']
96
+ v['data'] = base64(v['data'])
97
+ end
98
+ attachments
99
+ end
100
+
101
+ def base64 data
102
+ Base64.encode64(data).gsub(/\s/,'')
103
+ end
104
+ end
105
+ end