drip 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,127 @@ class Drip
7
7
  include DRbUndumped
8
8
  def inspect; to_s; end
9
9
 
10
+ class ImmutableDrip
11
+ class Generator
12
+ def initialize
13
+ @pool = []
14
+ @tag = []
15
+ @shared = Hash.new {|h, k| h[k] = k; k}
16
+ end
17
+
18
+ def add(key, value, *tag)
19
+ @pool << [key, value]
20
+ idx = @pool.size - 1
21
+ tag.uniq.each do |t|
22
+ @tag << [[@shared[t], key], idx]
23
+ end
24
+ end
25
+
26
+ def generate
27
+ tag = @tag.sort
28
+ tag.inject(nil) do |last, kv|
29
+ k = kv[0]
30
+ k[0] = last if k[0] == last
31
+ k[0]
32
+ end
33
+ ImmutableDrip.new(@pool.sort, tag)
34
+ end
35
+ end
36
+
37
+ INF = 1.0/0
38
+
39
+ def initialize(pool=[], tag=[])
40
+ @pool = pool
41
+ @tag = tag
42
+ end
43
+
44
+ def fetch(key)
45
+ idx = lower_boundary(@pool, key)
46
+ k, v = @pool[idx]
47
+ k == key ? v.to_a : nil
48
+ end
49
+
50
+ def read(key, n=1)
51
+ idx = lower_boundary(@pool, key + 1)
52
+ return [] unless idx
53
+ @pool[idx, n].collect {|kv|
54
+ [kv[0], *kv[1].to_a]
55
+ }
56
+ end
57
+
58
+ def read_tag(key, tag, n=1)
59
+ idx = lower_boundary(@tag, [tag, key + 1])
60
+ return [] unless idx
61
+ @tag[idx, n].find_all {|kv| kv[0][0] == tag}.collect {|kv|
62
+ [kv[0][1], *@pool[kv[1]][1].to_a]
63
+ }
64
+ end
65
+
66
+ def head_tag(n, tag)
67
+ lower = lower_boundary(@tag, [tag, 0])
68
+ upper = upper_boundary(@tag, [tag, INF])
69
+ lower = [lower, upper - n].max
70
+ @tag[lower ... upper].collect {|kv|
71
+ [kv[0][1], *@pool[kv[1]][1].to_a]
72
+ }
73
+ end
74
+
75
+ def head(n=1, tag=nil)
76
+ return head_tag(n, tag) if tag
77
+ n = @pool.size < n ? @pool.size : n
78
+ @pool[-n, n].collect {|kv|
79
+ [kv[0], *kv[1].to_a]
80
+ }
81
+ end
82
+
83
+ def older_tag(key, tag)
84
+ idx = upper_boundary(@tag, [tag, key-1])
85
+ k, v = @tag[idx - 1]
86
+ k && k[0] == tag ? [k[1], *@pool[v][1].to_a] : nil
87
+ end
88
+
89
+ def older(key, tag=nil)
90
+ key = @pool[-1][0] + 1 unless key
91
+ return older_tag(key, tag) if tag
92
+ idx = upper_boundary(@pool, key - 1)
93
+ k, v = @pool[idx - 1]
94
+ k && k < key ? [k, *v.to_a] : nil
95
+ end
96
+
97
+ def newer(key, tag=nil)
98
+ return read(key, 1)[0] unless tag
99
+ read_tag(key, tag, 1)[0]
100
+ end
101
+
102
+ def lower_boundary(ary, key)
103
+ lower = -1
104
+ upper = ary.size
105
+ while lower + 1 != upper
106
+ mid = (lower + upper).div(2)
107
+ if (ary[mid][0] <=> key) < 0
108
+ lower = mid
109
+ else
110
+ upper = mid
111
+ end
112
+ end
113
+ return upper
114
+ end
115
+
116
+ def upper_boundary(ary, key)
117
+ lower = -1
118
+ upper = ary.size
119
+ while lower + 1 != upper
120
+ mid = (lower + upper).div(2)
121
+ if (ary[mid][0] <=> key) <= 0
122
+ lower = mid
123
+ else
124
+ upper = mid
125
+ end
126
+ end
127
+ return lower + 1
128
+ end
129
+ end
130
+
10
131
  def initialize(dir, option={})
11
132
  @pool = RBTree.new
12
133
  @tag = RBTree.new
@@ -15,20 +136,20 @@ class Drip
15
136
  prepare_store(dir, option)
16
137
  end
17
138
 
18
- def write(*value)
19
- write_after(Time.now, *value)
139
+ def write(obj, *tags)
140
+ write_after(Time.now, obj, *tags)
20
141
  end
21
142
 
22
143
  def write_after(at, *value)
23
144
  make_key(at) do |key|
24
- do_write(key, value)
145
+ value = do_write(key, value)
25
146
  @pool[key] = @store.write(key, value)
26
147
  end
27
148
  end
28
149
 
29
150
  def write_at(at, *value)
30
151
  make_key_at(at) do |key|
31
- do_write(key, value)
152
+ value = do_write(key, value)
32
153
  @pool[key] = @store.write(key, value)
33
154
  end
34
155
  end
@@ -197,15 +318,30 @@ class Drip
197
318
  end
198
319
 
199
320
  Dir.mkdir(dir) rescue nil
321
+ dump = Dir.glob(File.join(dir, '*.dump')).max_by do |fn|
322
+ File.basename(fn).to_i(36)
323
+ end
324
+ if dump
325
+ @pool, @tag, last = File.open(dump, 'rb') {|fp| Marshal.load(fp)}
326
+ @event.take([:last, nil])
327
+ @event.write([:last, last])
328
+ File.unlink(dump)
329
+ end
330
+ loaded = dump ? File.basename(dump).to_i(36) : 0
200
331
  Dir.glob(File.join(dir, '*.log')) do |fn|
332
+ next if loaded > File.basename(fn).to_i(36)
201
333
  begin
202
334
  store = SimpleStore.reader(fn)
203
335
  restore(store)
204
336
  rescue
205
337
  end
206
338
  end
207
- name = time_to_key(Time.now).to_s(36) + '.log'
208
- @store = SimpleStore.new(File.join(dir, name))
339
+ name = time_to_key(Time.now).to_s(36)
340
+ _, last = @event.read([:last, nil])
341
+ File.open(File.join(dir, name + '.dump'), 'wb') {|fp|
342
+ Marshal.dump([@pool, @tag, last], fp)
343
+ }
344
+ @store = SimpleStore.new(File.join(dir, name + '.log'))
209
345
  end
210
346
 
211
347
  def shared_text(str)
@@ -218,13 +354,14 @@ class Drip
218
354
  end
219
355
 
220
356
  def do_write(key, value)
221
- (1...value.size).each do |n|
222
- k = value[n]
357
+ obj, *tags = value
358
+ tags.uniq!
359
+ tags.each do |k|
223
360
  next unless String === k
224
361
  tag = shared_text(k)
225
362
  @tag[[tag, key]] = key
226
363
  end
227
- @pool[key] = value
364
+ @pool[key] = [obj] + tags
228
365
  end
229
366
 
230
367
  def restore(store)
@@ -1,3 +1,3 @@
1
1
  module Drip
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -37,6 +37,10 @@ def MyDrip.invoke
37
37
  end
38
38
  end
39
39
 
40
+ def MyDrip.inspect
41
+ "<MyDrip: #{@uri}>"
42
+ end
43
+
40
44
  class DripCursor
41
45
  def initialize(drip, bufsiz=10, at_least=10)
42
46
  @drip = drip
@@ -54,7 +54,7 @@ class CopoCopo
54
54
  @last, event = @drip.read_tag(@last, 'DripDemo Event', 1)[0]
55
55
  next if retweet?(event)
56
56
  next if mention?(event)
57
- next unless Time.now < created_at(event) + 60000
57
+ next unless Time.now < created_at(event) + 6000
58
58
  name = dig(event, 'user', 'screen_name')
59
59
  next unless @friends.include?(name)
60
60
  ary = extract(event['text'] || '')
@@ -0,0 +1,56 @@
1
+ require 'pp'
2
+ require 'my_drip'
3
+ require 'monitor'
4
+
5
+ class Crawler
6
+ include MonitorMixin
7
+
8
+ def initialize
9
+ super()
10
+ @root = File.expand_path('~/develop/git-repo/')
11
+ @drip = MyDrip
12
+ k, = @drip.head(1, 'rbcrawl-begin')[0]
13
+ @fence = k || 0
14
+ end
15
+
16
+ def last_mtime(fname)
17
+ k, v, = @drip.head(1, 'rbcrawl-fname=' + fname)[0]
18
+ (v && k > @fence) ? v[1] : Time.at(1)
19
+ end
20
+
21
+ def do_crawl
22
+ synchronize do
23
+ ary = []
24
+ Dir.chdir(@root)
25
+ Dir.glob('**/*.rb').each do |fname|
26
+ mtime = File.mtime(fname)
27
+ next if last_mtime(fname) >= mtime
28
+ @drip.write([fname, mtime, File.read(fname)],
29
+ 'rbcrawl', 'rbcrawl-fname=' + fname)
30
+ ary << fname
31
+ end
32
+ @drip.write(ary, 'rbcrawl-footprint')
33
+ ary
34
+ end
35
+ end
36
+
37
+ def quit
38
+ synchronize do
39
+ exit(0)
40
+ end
41
+ end
42
+ end
43
+
44
+ if __FILE__ == $0
45
+ crawler = Crawler.new
46
+ Thread.new do
47
+ while true
48
+ pp crawler.do_crawl
49
+ sleep 60
50
+ end
51
+ end
52
+
53
+ gets
54
+ crawler.quit
55
+ end
56
+
@@ -0,0 +1,71 @@
1
+ require 'index'
2
+ require 'crawl'
3
+ require 'webrick/cgi'
4
+ require 'erb'
5
+
6
+ class DemoListView
7
+ include ERB::Util
8
+ extend ERB::DefMethod
9
+ def_erb_method('to_html(word, list)', ERB.new(<<EOS))
10
+ <html><head><title>Demo UI</title></head><body>
11
+ <form method="post"><input type="text" name="w" value="<%=h word %>" /></form>
12
+ <% if word %>
13
+ <p>search: <%=h word %></p>
14
+ <ul>
15
+ <% list.each do |fname| %>
16
+ <li><%=h fname%></li>
17
+ <% end %>
18
+ </ul>
19
+ <% end %>
20
+ </body></html>
21
+ EOS
22
+ end
23
+
24
+ class DemoUICGI < WEBrick::CGI
25
+ def initialize(crawler, indexer, *args)
26
+ super(*args)
27
+ @crawler = crawler
28
+ @indexer = indexer
29
+ @list_view = DemoListView.new
30
+ end
31
+
32
+ def req_query(req, key)
33
+ value ,= req.query[key]
34
+ return nil unless value
35
+ value.force_encoding('utf-8')
36
+ value
37
+ end
38
+
39
+ def do_GET(req, res)
40
+ if req.path_info == '/quit'
41
+ Thread.new do
42
+ @crawler.quit
43
+ end
44
+ end
45
+ word = req_query(req, 'w') || ''
46
+ list = word.empty? ? [] : @indexer.dict.query(word)
47
+ res['content-type'] = 'text/html; charset=utf-8'
48
+ res.body = @list_view.to_html(word, list)
49
+ end
50
+
51
+ alias do_POST do_GET
52
+ end
53
+
54
+ if __FILE__ == $0
55
+ crawler = Crawler.new
56
+ Thread.new do
57
+ while true
58
+ pp crawler.do_crawl
59
+ sleep 60
60
+ end
61
+ end
62
+
63
+ indexer = Indexer.new
64
+ Thread.new do
65
+ indexer.update_dict
66
+ end
67
+
68
+ cgi = DemoUICGI.new(crawler, indexer)
69
+ DRb.start_service('druby://localhost:50830', cgi)
70
+ DRb.thread.join
71
+ end
@@ -0,0 +1,69 @@
1
+ require 'index'
2
+ require 'crawl'
3
+ require 'webrick'
4
+ require 'erb'
5
+
6
+ class DemoListView
7
+ include ERB::Util
8
+ extend ERB::DefMethod
9
+ def_erb_method('to_html(word, list)', ERB.new(<<EOS))
10
+ <html><head><title>Demo UI</title></head><body>
11
+ <form method="post"><input type="text" name="w" value="<%=h word %>" /></form>
12
+ <% if word %>
13
+ <p>search: <%=h word %></p>
14
+ <ul>
15
+ <% list.each do |fname| %>
16
+ <li><%=h fname%></li>
17
+ <% end %>
18
+ </ul>
19
+ <% end %>
20
+ </body></html>
21
+ EOS
22
+ end
23
+
24
+ class DemoUIServlet < WEBrick::HTTPServlet::AbstractServlet
25
+ def initialize(server, crawler, indexer, list_view)
26
+ super(server)
27
+ @crawler = crawler
28
+ @indexer = indexer
29
+ @list_view = list_view
30
+ end
31
+
32
+ def req_query(req, key)
33
+ value ,= req.query[key]
34
+ return nil unless value
35
+ value.force_encoding('utf-8')
36
+ value
37
+ end
38
+
39
+ def do_GET(req, res)
40
+ word = req_query(req, 'w') || ''
41
+ list = word.empty? ? [] : @indexer.dict.query(word)
42
+ res['content-type'] = 'text/html; charset=utf-8'
43
+ res.body = @list_view.to_html(word, list)
44
+ end
45
+
46
+ alias do_POST do_GET
47
+ end
48
+
49
+ if __FILE__ == $0
50
+ crawler = Crawler.new
51
+ Thread.new do
52
+ while true
53
+ pp crawler.do_crawl
54
+ sleep 60
55
+ end
56
+ end
57
+
58
+ indexer = Indexer.new
59
+ Thread.new do
60
+ indexer.update_dict
61
+ end
62
+
63
+ server = WEBrick::HTTPServer.new({:Port => 10080,
64
+ :BindAddress => '127.0.0.1'})
65
+ server.mount('/', DemoUIServlet, crawler, indexer, DemoListView.new)
66
+ trap('INT') { server.shutdown }
67
+ server.start
68
+ crawler.quit
69
+ end
@@ -0,0 +1,96 @@
1
+ require 'nkf'
2
+ require 'rbtree'
3
+ require 'my_drip'
4
+ require 'monitor'
5
+ require 'pp'
6
+
7
+
8
+ class Indexer
9
+ def initialize(cursor=0)
10
+ @drip = MyDrip
11
+ @dict = Dict.new
12
+ k, = @drip.head(1, 'rbcrawl-begin')[0]
13
+ @fence = k || 0
14
+ @cursor = [cursor, @fence].max
15
+ end
16
+ attr_reader :dict
17
+
18
+ def update_dict
19
+ each_document do |cur, prev|
20
+ @dict.delete(*prev) if prev
21
+ @dict.push(*cur)
22
+ end
23
+ end
24
+
25
+ def each_document
26
+ while true
27
+ ary = @drip.read_tag(@cursor, 'rbcrawl', 10, 1)
28
+ ary.each do |k, v|
29
+ prev = prev_version(k, v[0])
30
+ yield(v, prev)
31
+ @cursor = k
32
+ end
33
+ end
34
+ end
35
+
36
+ def prev_version(cursor, fname)
37
+ k, v = @drip.older(cursor, 'rbcrawl-fname=' + fname)
38
+ (v && k > @fence) ? v : nil
39
+ end
40
+ end
41
+
42
+ class Dict
43
+ include MonitorMixin
44
+ def initialize
45
+ super()
46
+ @tree = RBTree.new
47
+ end
48
+
49
+ def query(word)
50
+ synchronize do
51
+ @tree.bound([word, 0, ''], [word + "\0", 0, '']).collect {|k, v| k[2]}
52
+ end
53
+ end
54
+
55
+ def delete(fname, mtime, src)
56
+ synchronize do
57
+ each_tree_key(fname, mtime, src) do |key|
58
+ @tree.delete(key)
59
+ end
60
+ end
61
+ end
62
+
63
+ def push(fname, mtime, src)
64
+ synchronize do
65
+ each_tree_key(fname, mtime, src) do |key|
66
+ @tree[key] = true
67
+ end
68
+ end
69
+ end
70
+
71
+ def intern(word)
72
+ k, v = @tree.lower_bound([word, 0, ''])
73
+ return k[0] if k && k[0] == word
74
+ word
75
+ end
76
+
77
+ def each_tree_key(fname, mtime, src)
78
+ NKF.nkf('-w', src).scan(/\w+/m).uniq.each do |word|
79
+ yield([intern(word), mtime.to_i, fname])
80
+ end
81
+ end
82
+ end
83
+
84
+ if __FILE__ == $0
85
+ indexer ||= Indexer.new(0)
86
+ Thread.new do
87
+ indexer.update_dict
88
+ end
89
+
90
+ while line = gets
91
+ ary = indexer.dict.query(line.chomp)
92
+ pp ary
93
+ pp ary.size
94
+ end
95
+ end
96
+