drip 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,6 +7,127 @@ class Drip
7
7
  include DRbUndumped
8
8
  def inspect; to_s; end
9
9
 
10
+ class ImmutableDrip
11
+ class Generator
12
+ def initialize
13
+ @pool = []
14
+ @tag = []
15
+ @shared = Hash.new {|h, k| h[k] = k; k}
16
+ end
17
+
18
+ def add(key, value, *tag)
19
+ @pool << [key, value]
20
+ idx = @pool.size - 1
21
+ tag.uniq.each do |t|
22
+ @tag << [[@shared[t], key], idx]
23
+ end
24
+ end
25
+
26
+ def generate
27
+ tag = @tag.sort
28
+ tag.inject(nil) do |last, kv|
29
+ k = kv[0]
30
+ k[0] = last if k[0] == last
31
+ k[0]
32
+ end
33
+ ImmutableDrip.new(@pool.sort, tag)
34
+ end
35
+ end
36
+
37
+ INF = 1.0/0
38
+
39
+ def initialize(pool=[], tag=[])
40
+ @pool = pool
41
+ @tag = tag
42
+ end
43
+
44
+ def fetch(key)
45
+ idx = lower_boundary(@pool, key)
46
+ k, v = @pool[idx]
47
+ k == key ? v.to_a : nil
48
+ end
49
+
50
+ def read(key, n=1)
51
+ idx = lower_boundary(@pool, key + 1)
52
+ return [] unless idx
53
+ @pool[idx, n].collect {|kv|
54
+ [kv[0], *kv[1].to_a]
55
+ }
56
+ end
57
+
58
+ def read_tag(key, tag, n=1)
59
+ idx = lower_boundary(@tag, [tag, key + 1])
60
+ return [] unless idx
61
+ @tag[idx, n].find_all {|kv| kv[0][0] == tag}.collect {|kv|
62
+ [kv[0][1], *@pool[kv[1]][1].to_a]
63
+ }
64
+ end
65
+
66
+ def head_tag(n, tag)
67
+ lower = lower_boundary(@tag, [tag, 0])
68
+ upper = upper_boundary(@tag, [tag, INF])
69
+ lower = [lower, upper - n].max
70
+ @tag[lower ... upper].collect {|kv|
71
+ [kv[0][1], *@pool[kv[1]][1].to_a]
72
+ }
73
+ end
74
+
75
+ def head(n=1, tag=nil)
76
+ return head_tag(n, tag) if tag
77
+ n = @pool.size < n ? @pool.size : n
78
+ @pool[-n, n].collect {|kv|
79
+ [kv[0], *kv[1].to_a]
80
+ }
81
+ end
82
+
83
+ def older_tag(key, tag)
84
+ idx = upper_boundary(@tag, [tag, key-1])
85
+ k, v = @tag[idx - 1]
86
+ k && k[0] == tag ? [k[1], *@pool[v][1].to_a] : nil
87
+ end
88
+
89
+ def older(key, tag=nil)
90
+ key = @pool[-1][0] + 1 unless key
91
+ return older_tag(key, tag) if tag
92
+ idx = upper_boundary(@pool, key - 1)
93
+ k, v = @pool[idx - 1]
94
+ k && k < key ? [k, *v.to_a] : nil
95
+ end
96
+
97
+ def newer(key, tag=nil)
98
+ return read(key, 1)[0] unless tag
99
+ read_tag(key, tag, 1)[0]
100
+ end
101
+
102
+ def lower_boundary(ary, key)
103
+ lower = -1
104
+ upper = ary.size
105
+ while lower + 1 != upper
106
+ mid = (lower + upper).div(2)
107
+ if (ary[mid][0] <=> key) < 0
108
+ lower = mid
109
+ else
110
+ upper = mid
111
+ end
112
+ end
113
+ return upper
114
+ end
115
+
116
+ def upper_boundary(ary, key)
117
+ lower = -1
118
+ upper = ary.size
119
+ while lower + 1 != upper
120
+ mid = (lower + upper).div(2)
121
+ if (ary[mid][0] <=> key) <= 0
122
+ lower = mid
123
+ else
124
+ upper = mid
125
+ end
126
+ end
127
+ return lower + 1
128
+ end
129
+ end
130
+
10
131
  def initialize(dir, option={})
11
132
  @pool = RBTree.new
12
133
  @tag = RBTree.new
@@ -15,20 +136,20 @@ class Drip
15
136
  prepare_store(dir, option)
16
137
  end
17
138
 
18
- def write(*value)
19
- write_after(Time.now, *value)
139
+ def write(obj, *tags)
140
+ write_after(Time.now, obj, *tags)
20
141
  end
21
142
 
22
143
  def write_after(at, *value)
23
144
  make_key(at) do |key|
24
- do_write(key, value)
145
+ value = do_write(key, value)
25
146
  @pool[key] = @store.write(key, value)
26
147
  end
27
148
  end
28
149
 
29
150
  def write_at(at, *value)
30
151
  make_key_at(at) do |key|
31
- do_write(key, value)
152
+ value = do_write(key, value)
32
153
  @pool[key] = @store.write(key, value)
33
154
  end
34
155
  end
@@ -197,15 +318,30 @@ class Drip
197
318
  end
198
319
 
199
320
  Dir.mkdir(dir) rescue nil
321
+ dump = Dir.glob(File.join(dir, '*.dump')).max_by do |fn|
322
+ File.basename(fn).to_i(36)
323
+ end
324
+ if dump
325
+ @pool, @tag, last = File.open(dump, 'rb') {|fp| Marshal.load(fp)}
326
+ @event.take([:last, nil])
327
+ @event.write([:last, last])
328
+ File.unlink(dump)
329
+ end
330
+ loaded = dump ? File.basename(dump).to_i(36) : 0
200
331
  Dir.glob(File.join(dir, '*.log')) do |fn|
332
+ next if loaded > File.basename(fn).to_i(36)
201
333
  begin
202
334
  store = SimpleStore.reader(fn)
203
335
  restore(store)
204
336
  rescue
205
337
  end
206
338
  end
207
- name = time_to_key(Time.now).to_s(36) + '.log'
208
- @store = SimpleStore.new(File.join(dir, name))
339
+ name = time_to_key(Time.now).to_s(36)
340
+ _, last = @event.read([:last, nil])
341
+ File.open(File.join(dir, name + '.dump'), 'wb') {|fp|
342
+ Marshal.dump([@pool, @tag, last], fp)
343
+ }
344
+ @store = SimpleStore.new(File.join(dir, name + '.log'))
209
345
  end
210
346
 
211
347
  def shared_text(str)
@@ -218,13 +354,14 @@ class Drip
218
354
  end
219
355
 
220
356
  def do_write(key, value)
221
- (1...value.size).each do |n|
222
- k = value[n]
357
+ obj, *tags = value
358
+ tags.uniq!
359
+ tags.each do |k|
223
360
  next unless String === k
224
361
  tag = shared_text(k)
225
362
  @tag[[tag, key]] = key
226
363
  end
227
- @pool[key] = value
364
+ @pool[key] = [obj] + tags
228
365
  end
229
366
 
230
367
  def restore(store)
@@ -1,3 +1,3 @@
1
1
  module Drip
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -37,6 +37,10 @@ def MyDrip.invoke
37
37
  end
38
38
  end
39
39
 
40
+ def MyDrip.inspect
41
+ "<MyDrip: #{@uri}>"
42
+ end
43
+
40
44
  class DripCursor
41
45
  def initialize(drip, bufsiz=10, at_least=10)
42
46
  @drip = drip
@@ -54,7 +54,7 @@ class CopoCopo
54
54
  @last, event = @drip.read_tag(@last, 'DripDemo Event', 1)[0]
55
55
  next if retweet?(event)
56
56
  next if mention?(event)
57
- next unless Time.now < created_at(event) + 60000
57
+ next unless Time.now < created_at(event) + 6000
58
58
  name = dig(event, 'user', 'screen_name')
59
59
  next unless @friends.include?(name)
60
60
  ary = extract(event['text'] || '')
@@ -0,0 +1,56 @@
1
+ require 'pp'
2
+ require 'my_drip'
3
+ require 'monitor'
4
+
5
+ class Crawler
6
+ include MonitorMixin
7
+
8
+ def initialize
9
+ super()
10
+ @root = File.expand_path('~/develop/git-repo/')
11
+ @drip = MyDrip
12
+ k, = @drip.head(1, 'rbcrawl-begin')[0]
13
+ @fence = k || 0
14
+ end
15
+
16
+ def last_mtime(fname)
17
+ k, v, = @drip.head(1, 'rbcrawl-fname=' + fname)[0]
18
+ (v && k > @fence) ? v[1] : Time.at(1)
19
+ end
20
+
21
+ def do_crawl
22
+ synchronize do
23
+ ary = []
24
+ Dir.chdir(@root)
25
+ Dir.glob('**/*.rb').each do |fname|
26
+ mtime = File.mtime(fname)
27
+ next if last_mtime(fname) >= mtime
28
+ @drip.write([fname, mtime, File.read(fname)],
29
+ 'rbcrawl', 'rbcrawl-fname=' + fname)
30
+ ary << fname
31
+ end
32
+ @drip.write(ary, 'rbcrawl-footprint')
33
+ ary
34
+ end
35
+ end
36
+
37
+ def quit
38
+ synchronize do
39
+ exit(0)
40
+ end
41
+ end
42
+ end
43
+
44
+ if __FILE__ == $0
45
+ crawler = Crawler.new
46
+ Thread.new do
47
+ while true
48
+ pp crawler.do_crawl
49
+ sleep 60
50
+ end
51
+ end
52
+
53
+ gets
54
+ crawler.quit
55
+ end
56
+
@@ -0,0 +1,71 @@
1
+ require 'index'
2
+ require 'crawl'
3
+ require 'webrick/cgi'
4
+ require 'erb'
5
+
6
+ class DemoListView
7
+ include ERB::Util
8
+ extend ERB::DefMethod
9
+ def_erb_method('to_html(word, list)', ERB.new(<<EOS))
10
+ <html><head><title>Demo UI</title></head><body>
11
+ <form method="post"><input type="text" name="w" value="<%=h word %>" /></form>
12
+ <% if word %>
13
+ <p>search: <%=h word %></p>
14
+ <ul>
15
+ <% list.each do |fname| %>
16
+ <li><%=h fname%></li>
17
+ <% end %>
18
+ </ul>
19
+ <% end %>
20
+ </body></html>
21
+ EOS
22
+ end
23
+
24
+ class DemoUICGI < WEBrick::CGI
25
+ def initialize(crawler, indexer, *args)
26
+ super(*args)
27
+ @crawler = crawler
28
+ @indexer = indexer
29
+ @list_view = DemoListView.new
30
+ end
31
+
32
+ def req_query(req, key)
33
+ value ,= req.query[key]
34
+ return nil unless value
35
+ value.force_encoding('utf-8')
36
+ value
37
+ end
38
+
39
+ def do_GET(req, res)
40
+ if req.path_info == '/quit'
41
+ Thread.new do
42
+ @crawler.quit
43
+ end
44
+ end
45
+ word = req_query(req, 'w') || ''
46
+ list = word.empty? ? [] : @indexer.dict.query(word)
47
+ res['content-type'] = 'text/html; charset=utf-8'
48
+ res.body = @list_view.to_html(word, list)
49
+ end
50
+
51
+ alias do_POST do_GET
52
+ end
53
+
54
+ if __FILE__ == $0
55
+ crawler = Crawler.new
56
+ Thread.new do
57
+ while true
58
+ pp crawler.do_crawl
59
+ sleep 60
60
+ end
61
+ end
62
+
63
+ indexer = Indexer.new
64
+ Thread.new do
65
+ indexer.update_dict
66
+ end
67
+
68
+ cgi = DemoUICGI.new(crawler, indexer)
69
+ DRb.start_service('druby://localhost:50830', cgi)
70
+ DRb.thread.join
71
+ end
@@ -0,0 +1,69 @@
1
+ require 'index'
2
+ require 'crawl'
3
+ require 'webrick'
4
+ require 'erb'
5
+
6
+ class DemoListView
7
+ include ERB::Util
8
+ extend ERB::DefMethod
9
+ def_erb_method('to_html(word, list)', ERB.new(<<EOS))
10
+ <html><head><title>Demo UI</title></head><body>
11
+ <form method="post"><input type="text" name="w" value="<%=h word %>" /></form>
12
+ <% if word %>
13
+ <p>search: <%=h word %></p>
14
+ <ul>
15
+ <% list.each do |fname| %>
16
+ <li><%=h fname%></li>
17
+ <% end %>
18
+ </ul>
19
+ <% end %>
20
+ </body></html>
21
+ EOS
22
+ end
23
+
24
+ class DemoUIServlet < WEBrick::HTTPServlet::AbstractServlet
25
+ def initialize(server, crawler, indexer, list_view)
26
+ super(server)
27
+ @crawler = crawler
28
+ @indexer = indexer
29
+ @list_view = list_view
30
+ end
31
+
32
+ def req_query(req, key)
33
+ value ,= req.query[key]
34
+ return nil unless value
35
+ value.force_encoding('utf-8')
36
+ value
37
+ end
38
+
39
+ def do_GET(req, res)
40
+ word = req_query(req, 'w') || ''
41
+ list = word.empty? ? [] : @indexer.dict.query(word)
42
+ res['content-type'] = 'text/html; charset=utf-8'
43
+ res.body = @list_view.to_html(word, list)
44
+ end
45
+
46
+ alias do_POST do_GET
47
+ end
48
+
49
+ if __FILE__ == $0
50
+ crawler = Crawler.new
51
+ Thread.new do
52
+ while true
53
+ pp crawler.do_crawl
54
+ sleep 60
55
+ end
56
+ end
57
+
58
+ indexer = Indexer.new
59
+ Thread.new do
60
+ indexer.update_dict
61
+ end
62
+
63
+ server = WEBrick::HTTPServer.new({:Port => 10080,
64
+ :BindAddress => '127.0.0.1'})
65
+ server.mount('/', DemoUIServlet, crawler, indexer, DemoListView.new)
66
+ trap('INT') { server.shutdown }
67
+ server.start
68
+ crawler.quit
69
+ end
@@ -0,0 +1,96 @@
1
+ require 'nkf'
2
+ require 'rbtree'
3
+ require 'my_drip'
4
+ require 'monitor'
5
+ require 'pp'
6
+
7
+
8
+ class Indexer
9
+ def initialize(cursor=0)
10
+ @drip = MyDrip
11
+ @dict = Dict.new
12
+ k, = @drip.head(1, 'rbcrawl-begin')[0]
13
+ @fence = k || 0
14
+ @cursor = [cursor, @fence].max
15
+ end
16
+ attr_reader :dict
17
+
18
+ def update_dict
19
+ each_document do |cur, prev|
20
+ @dict.delete(*prev) if prev
21
+ @dict.push(*cur)
22
+ end
23
+ end
24
+
25
+ def each_document
26
+ while true
27
+ ary = @drip.read_tag(@cursor, 'rbcrawl', 10, 1)
28
+ ary.each do |k, v|
29
+ prev = prev_version(k, v[0])
30
+ yield(v, prev)
31
+ @cursor = k
32
+ end
33
+ end
34
+ end
35
+
36
+ def prev_version(cursor, fname)
37
+ k, v = @drip.older(cursor, 'rbcrawl-fname=' + fname)
38
+ (v && k > @fence) ? v : nil
39
+ end
40
+ end
41
+
42
+ class Dict
43
+ include MonitorMixin
44
+ def initialize
45
+ super()
46
+ @tree = RBTree.new
47
+ end
48
+
49
+ def query(word)
50
+ synchronize do
51
+ @tree.bound([word, 0, ''], [word + "\0", 0, '']).collect {|k, v| k[2]}
52
+ end
53
+ end
54
+
55
+ def delete(fname, mtime, src)
56
+ synchronize do
57
+ each_tree_key(fname, mtime, src) do |key|
58
+ @tree.delete(key)
59
+ end
60
+ end
61
+ end
62
+
63
+ def push(fname, mtime, src)
64
+ synchronize do
65
+ each_tree_key(fname, mtime, src) do |key|
66
+ @tree[key] = true
67
+ end
68
+ end
69
+ end
70
+
71
+ def intern(word)
72
+ k, v = @tree.lower_bound([word, 0, ''])
73
+ return k[0] if k && k[0] == word
74
+ word
75
+ end
76
+
77
+ def each_tree_key(fname, mtime, src)
78
+ NKF.nkf('-w', src).scan(/\w+/m).uniq.each do |word|
79
+ yield([intern(word), mtime.to_i, fname])
80
+ end
81
+ end
82
+ end
83
+
84
+ if __FILE__ == $0
85
+ indexer ||= Indexer.new(0)
86
+ Thread.new do
87
+ indexer.update_dict
88
+ end
89
+
90
+ while line = gets
91
+ ary = indexer.dict.query(line.chomp)
92
+ pp ary
93
+ pp ary.size
94
+ end
95
+ end
96
+