embulk-input-apache-dummy-log 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5d5911d156d8aeb5be4d6abe440d9609d249ed9a
4
+ data.tar.gz: 7b50942c2c3f10a799c9457994a4cde7d06610c4
5
+ SHA512:
6
+ metadata.gz: cf35e7cbaf4969b8048433e362d2c9517b04383cd8815b5335a762051e11faaa23d1d333af28645fd27cf6ec049f3aa8468d4eea643db164fbbaa911e9a663ba
7
+ data.tar.gz: bc3b181d137a28f0f2580b7429277263be02c6d18cceb9df2afe0ed73589ea1086938834a78dff45e9fd91595a08252204d49b52deaa53ade69afc64cf8a0d1f
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,32 @@
1
+ # Apache Dummy Log input plugin for Embulk
2
+
3
+ This is embulk-input plugin to generate apache combined dummy log.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: input
8
+ * **Load all or nothing**: yes
9
+ * **Resume supported**: no
10
+
11
+ ## Configuration
12
+
13
+ - **size**: number of log size (long, default: 100)
14
+
15
+ ## Example
16
+
17
+ ```yaml
18
+ in:
19
+ type: apache-dummy-log
20
+ size: 100
21
+ ```
22
+
23
+ ## Build
24
+
25
+ ```
26
+ $ rake
27
+ ```
28
+
29
+ ## Note
30
+
31
+ This code based on
32
+ [sample_apache_gen.rb](https://github.com/treasure-data/td/blob/master/data/sample_apache_gen.rb) at treasure-data.
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,18 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-input-apache-dummy-log"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["Hiroyuki Sato"]
6
+ spec.summary = "Apache Dummy Log input plugin for Embulk"
7
+ spec.description = "Apache Dummy Log input plugin is an Embulk plugin that loads records from Apache Dummy Log so that any output plugins can receive the records. Search the output plugins by 'embulk-output' keyword."
8
+ spec.email = ["hiroysato@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/hiroyuki-sato/embulk-input-apache-dummy-log"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_development_dependency 'bundler', ['~> 1.0']
17
+ spec.add_development_dependency 'rake', ['>= 10.0']
18
+ end
@@ -0,0 +1,250 @@
1
+ module Embulk
2
+ module Input
3
+
4
+ class ApacheDummyLogInputPlugin < InputPlugin
5
+ Plugin.register_input("apache-dummy-log", self)
6
+
7
+ RECORDS = 5000
8
+ HOSTS = RECORDS/4
9
+ PAGES = RECORDS/4
10
+
11
+ AGENT_LIST_STRING = <<END
12
+ Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3
13
+ Mozilla/5.0 (iPad; CPU OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3
14
+ Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)
15
+ Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)
16
+ Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)
17
+ Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)
18
+ Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)
19
+ Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
20
+ Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
21
+ Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
22
+ Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
23
+ Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7
24
+ Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7
25
+ Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11
26
+ Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11
27
+ Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11
28
+ Mozilla/5.0 (Windows NT 6.0; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
29
+ Mozilla/5.0 (Windows NT 6.0; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
30
+ Mozilla/5.0 (Windows NT 6.0; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
31
+ Mozilla/5.0 (Windows NT 6.0; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
32
+ Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11
33
+ Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11
34
+ Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.46 Safari/535.11
35
+ Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1
36
+ Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1
37
+ Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1
38
+ Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; YTB730; GTB7.2; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E; Media Center PC 6.0)
39
+ Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; YTB730; GTB7.2; EasyBits GO v1.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C)
40
+ Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; GTB7.2; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C)
41
+ Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; YTB730; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C)
42
+ Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; WOW64; Trident/4.0; GTB6; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30618; .NET4.0C)
43
+ Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; YTB720; GTB7.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)
44
+ Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; BTRS122159; GTB7.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; BRI/2)
45
+ END
46
+ AGENT_LIST = AGENT_LIST_STRING.split("\n")
47
+
48
+ PAGE_CATEGORIES = %w[
49
+ books
50
+ books
51
+ books
52
+ electronics
53
+ electronics
54
+ electronics
55
+ electronics
56
+ electronics
57
+ electronics
58
+ software
59
+ software
60
+ software
61
+ software
62
+ games
63
+ games
64
+ games
65
+ office
66
+ office
67
+ cameras
68
+ computers
69
+ finance
70
+ giftcards
71
+ garden
72
+ health
73
+ music
74
+ sports
75
+ toys
76
+ networking
77
+ jewelry
78
+ ]
79
+
80
+ class Host
81
+ def initialize
82
+ @ip = "#{(grand(210)+20)/4*4}.#{(grand(210)+20)/3*3}.#{grand(210)+20}.#{grand(210)+20}"
83
+ @agents = []
84
+ end
85
+
86
+ attr_reader :ip
87
+ def grand(n)
88
+ RANDOM.rand(n)
89
+ end
90
+
91
+ def agent
92
+ if @agents.size == 4
93
+ @agents[grand(4)]
94
+ else
95
+ agent = AGENT_LIST[grand(AGENT_LIST.size)]
96
+ @agents << agent
97
+ agent
98
+ end
99
+ end
100
+ end
101
+
102
+
103
+ class Page
104
+ def grand(n)
105
+ RANDOM.rand(n)
106
+ end
107
+ def initialize
108
+ cate = PAGE_CATEGORIES[grand(PAGE_CATEGORIES.size)]
109
+ item = grand(RECORDS)
110
+
111
+ if grand(2) == 0
112
+ w = [cate, PAGE_CATEGORIES[grand(PAGE_CATEGORIES.size)]]
113
+ else
114
+ w = [cate]
115
+ end
116
+ q = w.map {|k| k[0].upcase + k[1..-1] }.join('+')
117
+ search_path = "/search/?c=#{q}"
118
+ google_ref = "http://www.google.com/search?ie=UTF-8&q=google&sclient=psy-ab&q=#{q}&oq=#{q}&aq=f&aqi=g-vL1&aql=&pbx=1&bav=on.2,or.r_gc.r_pw.r_qf.,cf.osb&biw=#{grand(5000)}&bih=#{grand(600)}"
119
+
120
+ case grand(12)
121
+ when 0,1,2,3,4,5
122
+ @path = "/category/#{cate}"
123
+ @referers = [nil, nil, nil, nil, nil, nil, nil, google_ref]
124
+ @method = 'GET'
125
+ @code = 200
126
+
127
+ when 6
128
+ @path = "/category/#{cate}?from=#{grand(3)*10}"
129
+ @referers = [search_path, "/category/#{cate}"]
130
+ @method = 'GET'
131
+ @code = 200
132
+
133
+ when 7,8,9,10
134
+ @path = "/item/#{cate}/#{item}"
135
+ @referers = [search_path, search_path, google_ref, "/category/#{cate}"]
136
+ @method = 'GET'
137
+ if grand(100) == 0
138
+ @code = 404
139
+ else
140
+ @code = 200
141
+ end
142
+
143
+ when 11
144
+ @path = search_path
145
+ @referers = [nil]
146
+ @method = 'POST'
147
+ @code = 200
148
+ end
149
+
150
+ @size = grand(100) + 40
151
+ end
152
+
153
+ attr_reader :path, :size, :method, :code
154
+
155
+ def referer
156
+ if grand(2) == 0
157
+ @referers[grand(@referers.size)]
158
+ end
159
+ end
160
+ end
161
+
162
+ RANDOM = Random.new
163
+
164
+ def self.transaction(config, &control)
165
+ # configuration code:
166
+ task = {
167
+ "size" => config.param("size", :integer, default: 100),
168
+ }
169
+
170
+ columns = [
171
+ Column.new(0, "remote_host", :string),
172
+ Column.new(1, "identity_check", :string),
173
+ Column.new(2, "user", :string),
174
+ Column.new(3, "datetime", :timestamp),
175
+ Column.new(4, "method", :string),
176
+ Column.new(5, "path", :string),
177
+ Column.new(6, "protocol", :string),
178
+ Column.new(7, "status", :long),
179
+ Column.new(8, "size", :long),
180
+ Column.new(9, "referer", :string),
181
+ Column.new(10,"user_agent", :string)
182
+ ]
183
+
184
+ resume(task, columns, 1, &control)
185
+ end
186
+
187
+ def self.resume(task, columns, count, &control)
188
+ commit_reports = yield(task, columns, count)
189
+
190
+ next_config_diff = {}
191
+ return next_config_diff
192
+ end
193
+
194
+ def init
195
+ # initialization code:
196
+ @size = @task["size"]
197
+
198
+ @ip = "#{(grand(210)+20)/4*4}.#{(grand(210)+20)/3*3}.#{grand(210)+20}.#{grand(210)+20}"
199
+ @agents = []
200
+
201
+ @pages = []
202
+ @size.times do
203
+ @pages << Page.new
204
+ end
205
+
206
+ @hosts = []
207
+ HOSTS.times do
208
+ @hosts << Host.new
209
+ end
210
+
211
+ end
212
+
213
+ def grand(n)
214
+ RANDOM.rand(n)
215
+ end
216
+
217
+ def run
218
+
219
+ now = Time.now.to_i
220
+
221
+ @size.times do
222
+ now += grand(5)
223
+ page = @pages[grand(@pages.size)]
224
+ host = @hosts[grand(@hosts.size)]
225
+ record = [
226
+ host.ip, # remote_host
227
+ '-', # identity_check
228
+ '-', # user
229
+ Time.new, # datetime
230
+ page.method, # method
231
+ page.path, # path
232
+ 'HTTP/1.1', # protocol
233
+ grand(10000) == 0 ? 500 : page.code, # status
234
+ page.size, # page.size
235
+ (grand(2) == 0 ? @pages[grand(@pages.size)].path : page.referer) || '-', # referer
236
+ host.agent, # agent
237
+ ]
238
+ page_builder.add(record)
239
+ end
240
+
241
+ page_builder.finish
242
+
243
+ commit_report = {}
244
+ return commit_report
245
+ end
246
+
247
+
248
+ end
249
+ end
250
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-input-apache-dummy-log
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Hiroyuki Sato
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: Apache Dummy Log input plugin is an Embulk plugin that loads records
42
+ from Apache Dummy Log so that any output plugins can receive the records. Search
43
+ the output plugins by 'embulk-output' keyword.
44
+ email:
45
+ - hiroysato@gmail.com
46
+ executables: []
47
+ extensions: []
48
+ extra_rdoc_files: []
49
+ files:
50
+ - ".gitignore"
51
+ - Gemfile
52
+ - LICENSE.txt
53
+ - README.md
54
+ - Rakefile
55
+ - embulk-input-apache-dummy-log.gemspec
56
+ - lib/embulk/input/apache-dummy-log.rb
57
+ homepage: https://github.com/hiroyuki-sato/embulk-input-apache-dummy-log
58
+ licenses:
59
+ - MIT
60
+ metadata: {}
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ requirements: []
76
+ rubyforge_project:
77
+ rubygems_version: 2.4.5
78
+ signing_key:
79
+ specification_version: 4
80
+ summary: Apache Dummy Log input plugin for Embulk
81
+ test_files: []
82
+ has_rdoc: