filecamo 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +1 -0
- data/bin/filecamo +84 -0
- data/filecamo.gempec +15 -0
- data/lib/filecamo.rb +6 -0
- data/lib/filecamo/generator.rb +121 -0
- data/lib/filecamo/text_mucker.rb +139 -0
- metadata +93 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cc867e4a005a911f06b95fc7a483ab8321a517c3
|
4
|
+
data.tar.gz: 8229c95f127d9e938ff75ea31c24a817a30099d3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2a1af293cb02a0e77998337a2c75e348f0578c3abcedf93c421ea7de885ec207141112f2465dc35ed52e7be08ed8d32f042002ae10c5c4a2d37b73341996d8fc
|
7
|
+
data.tar.gz: 3f3a3363e9d00aa1006e3174ff700d643449bdf13541a892ba65f274004736d1fa3ecb9fc07ff700afd8b2afe87cf741ff9580a5db37898ae0d53072d9e86952
|
data/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# filecamo
|
data/bin/filecamo
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/filecamo'
|
4
|
+
|
5
|
+
BN = File.basename($0, '.rb')
|
6
|
+
|
7
|
+
cmd = ARGV.shift
|
8
|
+
if cmd == 'gen'
|
9
|
+
if i = ARGV.index('-d')
|
10
|
+
ARGV.delete_at(i)
|
11
|
+
dst_path = ARGV.delete_at(i)
|
12
|
+
end
|
13
|
+
|
14
|
+
if ARGV.size < 2
|
15
|
+
bin, bm = BetterBytes::BINARY
|
16
|
+
dec, dm = BetterBytes::DECIMAL
|
17
|
+
|
18
|
+
$stderr.puts <<EOF
|
19
|
+
|
20
|
+
usage: #{BN} gen [-d <destination_path>] <minimum_size> <maximum_size> <count> <depth> [<percent_text>]
|
21
|
+
|
22
|
+
The size MAY end with a unit label:
|
23
|
+
* BINARY labels (#{bm} multiplier): #{bin.join(', ')}
|
24
|
+
* DECIMAL labels (#{dm} multiplier): #{dec.join(', ')}
|
25
|
+
|
26
|
+
The count will determine the number of files to generate.
|
27
|
+
|
28
|
+
The depth will determine the hierarchy of the files. The number of files will be randomly
|
29
|
+
distributed among the tree.
|
30
|
+
|
31
|
+
The percent text optional value indicates how many files should be text instead of binary.
|
32
|
+
Default is none (i.e. all files have random binary content).
|
33
|
+
|
34
|
+
EOF
|
35
|
+
exit 1
|
36
|
+
end
|
37
|
+
|
38
|
+
min, max, count, depth, percent_text = ARGV
|
39
|
+
generator = Filecamo::Generator.new
|
40
|
+
generator.generate(min, max, count, depth,
|
41
|
+
percent_text: percent_text, destination_path: dst_path) do |path, len|
|
42
|
+
puts "#{BetterBytes.humanize(len, precision: 6.1)} => #{path}"
|
43
|
+
end
|
44
|
+
generator.wait do |txt_count, bin_count|
|
45
|
+
$stdout.printf "\rWaiting for #{txt_count} txt and #{bin_count} bin jobs "\
|
46
|
+
"to complete..." + (' '*10)
|
47
|
+
$stdout.flush
|
48
|
+
end
|
49
|
+
$stdout.puts
|
50
|
+
generator.kill
|
51
|
+
|
52
|
+
elsif cmd == 'muck'
|
53
|
+
if ARGV.size < 3
|
54
|
+
$stderr.puts <<EOF
|
55
|
+
|
56
|
+
usage: #{BN} muck <percent_select> <percent_change> <path> [<path>...]
|
57
|
+
|
58
|
+
The percent select should be a value from 1 to 100 indicating how likely a given file will be
|
59
|
+
modified.
|
60
|
+
|
61
|
+
The percent change should be a value from 1 to 100 indicating (roughly) how many lines should
|
62
|
+
be added.
|
63
|
+
|
64
|
+
Paths can be filenames or directories to recurse and will be descended breadth-first. Any file
|
65
|
+
entry starting with '.' is skipped (ignored).
|
66
|
+
|
67
|
+
EOF
|
68
|
+
exit 1
|
69
|
+
end
|
70
|
+
|
71
|
+
percent_select, percent_change, *paths = ARGV
|
72
|
+
mucker = Filecamo::TextMucker.new(" #{BN}: ")
|
73
|
+
mucker.muck(percent_select, percent_change, paths) do |fn, lang, line_nums|
|
74
|
+
puts "Modified #{fn} (#{lang}): #{line_nums.join(',')}"
|
75
|
+
end
|
76
|
+
|
77
|
+
else
|
78
|
+
$stderr.puts <<EOF
|
79
|
+
|
80
|
+
usage: #{BN} { gen | muck } ...
|
81
|
+
|
82
|
+
EOF
|
83
|
+
exit 1
|
84
|
+
end
|
data/filecamo.gempec
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'filecamo'
|
3
|
+
s.version = File.readlines('lib/filecamo.rb').grep(/VERSION/){|v|v.match(/'([^']+)'/)[1]}[0]
|
4
|
+
s.summary = 'File content generator and manipulator.'
|
5
|
+
s.description = 'Generate and manipulate entire directory trees of either binary or text file content.'
|
6
|
+
s.authors = ['Brad Robel-Forrest']
|
7
|
+
s.email = 'brad@bitpony.com'
|
8
|
+
s.files = `git ls-files -z`.split("\x0")
|
9
|
+
s.executables = ['filecamo']
|
10
|
+
s.homepage = 'https://github.com/bradrf/filecamo#readme'
|
11
|
+
s.license = 'MIT'
|
12
|
+
s.add_runtime_dependency 'better_bytes', '~> 0.0.1'
|
13
|
+
s.add_runtime_dependency 'literate_randomizer', '~> 0.4.0'
|
14
|
+
s.add_runtime_dependency 'ruby-filemagic', '~> 0.7.1'
|
15
|
+
end
|
data/lib/filecamo.rb
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'thread'
|
3
|
+
require 'thwait'
|
4
|
+
require 'pathname'
|
5
|
+
require 'better_bytes'
|
6
|
+
require 'literate_randomizer'
|
7
|
+
|
8
|
+
module Filecamo
|
9
|
+
class Generator
|
10
|
+
def initialize(logger: Logger.new($stdout),
|
11
|
+
txt_workers: {count: 4, queue_size: 5000},
|
12
|
+
bin_workers: {count: 4, queue_size: 5000})
|
13
|
+
@gen = Random.new
|
14
|
+
@logger = logger
|
15
|
+
@words_generated = {}
|
16
|
+
|
17
|
+
@txt_work_q = SizedQueue.new(txt_workers[:queue_size])
|
18
|
+
@txt_workers = start_workers(:txt, @txt_work_q, txt_workers[:count]) do |file, len|
|
19
|
+
while len > 0
|
20
|
+
line = (LiterateRandomizer.sentence + $/)
|
21
|
+
line.slice!(len..-1)
|
22
|
+
len -= file.write(line)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
@bin_work_q = SizedQueue.new(bin_workers[:queue_size])
|
27
|
+
@bin_workers = start_workers(:bin, @bin_work_q, bin_workers[:count]) do |file, len|
|
28
|
+
while len > 0
|
29
|
+
len -= file.write(@gen.bytes(len < 32768 ? len : 32768))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def generate(min, max, count, depth, percent_text: 0, destination_path: nil, &block)
|
35
|
+
min = BetterBytes.dehumanize(min)
|
36
|
+
max = BetterBytes.dehumanize(max)
|
37
|
+
count = count.to_i
|
38
|
+
depth = depth.to_i
|
39
|
+
percent_text = percent_text.to_f / 100
|
40
|
+
dst_pn = Pathname.new(destination_path || '.')
|
41
|
+
feed_workers(min, max, count, depth, percent_text, dst_pn, &block)
|
42
|
+
end
|
43
|
+
|
44
|
+
def wait(sleep_interval: 0.3)
|
45
|
+
until @txt_work_q.empty? && @bin_work_q.empty?
|
46
|
+
block_given? and
|
47
|
+
yield(@txt_work_q.size, @bin_work_q.size)
|
48
|
+
sleep(sleep_interval)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def kill
|
53
|
+
(@txt_workers + @bin_workers).each{|th| th.kill}
|
54
|
+
ThreadsWait.all_waits(@txt_workers + @bin_workers) do |th|
|
55
|
+
th.join
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
######################################################################
|
60
|
+
private
|
61
|
+
|
62
|
+
def start_workers(name, queue, count)
|
63
|
+
count.times.map do
|
64
|
+
Thread.new do
|
65
|
+
begin
|
66
|
+
loop do
|
67
|
+
len, fn = queue.pop
|
68
|
+
fn.open('wb') do |file|
|
69
|
+
yield file, len
|
70
|
+
end
|
71
|
+
end
|
72
|
+
rescue Exception => ex
|
73
|
+
@logger.fatal "#{Thread.current} failed (#{name})"
|
74
|
+
@logger.fatal ex
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def feed_workers(min, max, count, depth, percent_text, dst_pn)
|
81
|
+
paths = {}
|
82
|
+
|
83
|
+
count.times do |i|
|
84
|
+
d = Random.rand(depth)
|
85
|
+
if d < 1
|
86
|
+
pn = Pathname.new('')
|
87
|
+
else
|
88
|
+
pn = nil
|
89
|
+
pns = paths[d] and pn = pns[@gen.rand(pns.size)]
|
90
|
+
unless pn
|
91
|
+
pn = Pathname.new(gen_name)
|
92
|
+
d.times{ pn += gen_name }
|
93
|
+
(paths[d] ||= []) << pn
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
fn = dst_pn + pn + (gen_name + (@gen.rand > percent_text ? '.bin' : '.txt'))
|
98
|
+
fn.parent.mkpath
|
99
|
+
len = @gen.rand(min..max).round
|
100
|
+
|
101
|
+
block_given? and
|
102
|
+
yield(fn, len)
|
103
|
+
|
104
|
+
if fn.extname == '.txt'
|
105
|
+
@txt_work_q.push([len, fn])
|
106
|
+
else
|
107
|
+
@bin_work_q.push([len, fn])
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def gen_name
|
113
|
+
# guarantee the same capitalization is used for the same word
|
114
|
+
word = LiterateRandomizer.word
|
115
|
+
lword = word.downcase
|
116
|
+
prev_word = @words_generated[lword] and
|
117
|
+
return prev_word
|
118
|
+
return @words_generated[lword] = word
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'filemagic'
|
3
|
+
require 'literate_randomizer'
|
4
|
+
|
5
|
+
module Filecamo
|
6
|
+
class TextMucker
|
7
|
+
MAX_FILE_SIZE = 128 * 1024
|
8
|
+
|
9
|
+
LANG_MARKS = {
|
10
|
+
csharp: '//',
|
11
|
+
python: '#',
|
12
|
+
ruby: '#',
|
13
|
+
shell: '#',
|
14
|
+
js: '#',
|
15
|
+
plain: '#',
|
16
|
+
}
|
17
|
+
|
18
|
+
# todo: fix parsable types, somethign like the following
|
19
|
+
# (figure out how to insert, perhaps deep iterater and counters?)
|
20
|
+
# (figure out types to insert, (i.e. would be bad to insert string into number array))
|
21
|
+
LANG_PARSERS = {
|
22
|
+
json: ->(fn){}
|
23
|
+
}
|
24
|
+
|
25
|
+
def initialize(comment_prefix, logger: Logger.new($stdout))
|
26
|
+
@marks = LANG_MARKS.clone
|
27
|
+
@marks.each_value{|m| m << comment_prefix}
|
28
|
+
@logger = logger
|
29
|
+
@magic = FileMagic.new
|
30
|
+
@mime = FileMagic.mime
|
31
|
+
end
|
32
|
+
|
33
|
+
def muck(percent_select, percent_lines, paths)
|
34
|
+
select_chance = percent_select.to_f / 100
|
35
|
+
lines_chance = percent_lines.to_f / 100
|
36
|
+
|
37
|
+
paths.each do |path|
|
38
|
+
path[0] == '.' and next
|
39
|
+
if File.directory?(path)
|
40
|
+
paths.concat(Dir.entries(path).map{|e| e[0] == '.' ? nil : File.join(path,e)}.compact)
|
41
|
+
next
|
42
|
+
end
|
43
|
+
|
44
|
+
fn = path
|
45
|
+
fn_size = File.size(fn)
|
46
|
+
|
47
|
+
# todo: support working with large files by reading next line
|
48
|
+
if fn_size > MAX_FILE_SIZE
|
49
|
+
@logger.debug "Skipping #{fn} by size: #{file.size}"
|
50
|
+
break
|
51
|
+
end
|
52
|
+
|
53
|
+
lang = case File.extname(fn)
|
54
|
+
when '.cs' then :csharp
|
55
|
+
when '.py' then :python
|
56
|
+
when '.js' then :js
|
57
|
+
when '.json' then :json
|
58
|
+
when '.yaml','meta' then :yaml
|
59
|
+
when '.html' then :html
|
60
|
+
when '.txt' then :plain
|
61
|
+
else
|
62
|
+
case m = @mime.file(fn)
|
63
|
+
when /python/ then :python
|
64
|
+
when /ruby/ then :ruby
|
65
|
+
when /shell/ then :shell
|
66
|
+
when /plain/
|
67
|
+
case g = @magic.file(fn)
|
68
|
+
when /python/ then :python
|
69
|
+
when /ruby/ then :ruby
|
70
|
+
when /node/ then :js
|
71
|
+
else
|
72
|
+
:plain
|
73
|
+
end
|
74
|
+
else
|
75
|
+
@logger.debug "Skipping #{fn} by mime type: #{m}"
|
76
|
+
next
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if Random.rand > select_chance
|
81
|
+
@logger.debug "Skipping #{fn} by chance"
|
82
|
+
next
|
83
|
+
end
|
84
|
+
|
85
|
+
new_lines = {}
|
86
|
+
new_bytes_needed = (fn_size * lines_chance).floor
|
87
|
+
while new_bytes_needed > 0
|
88
|
+
offset = Random.rand(fn_size)
|
89
|
+
new_line = get_line_for(lang)
|
90
|
+
new_lines[offset] = new_line
|
91
|
+
new_bytes_needed -= new_line.bytesize
|
92
|
+
end
|
93
|
+
new_lines = new_lines.sort
|
94
|
+
|
95
|
+
body = ''
|
96
|
+
line_nums = []
|
97
|
+
|
98
|
+
File.open(fn) do |file|
|
99
|
+
line_num = 0
|
100
|
+
while !file.eof? && line = file.readline
|
101
|
+
body << line
|
102
|
+
line_num += 1
|
103
|
+
new_lines.empty? and next # read remainder of file
|
104
|
+
offset = new_lines[0][0]
|
105
|
+
if file.pos >= offset # add a line as soon as passed the offset
|
106
|
+
offset, new_line = new_lines.shift
|
107
|
+
line_nums << (line_num+=1)
|
108
|
+
body << new_line
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# concat any remaining lines
|
113
|
+
if !new_lines.empty?
|
114
|
+
body[-1] == $/ or body << $/
|
115
|
+
new_lines.each do |offset, new_line|
|
116
|
+
line_num += 1
|
117
|
+
line_nums << line_num
|
118
|
+
body << new_line
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# todo: use same charset as mime type indicates when writing!
|
124
|
+
File.open(fn, 'wb') {|f| f.write(body)}
|
125
|
+
|
126
|
+
block_given? and yield(fn, lang, line_nums)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
######################################################################
|
131
|
+
private
|
132
|
+
|
133
|
+
def get_line_for(lang)
|
134
|
+
mark = @marks[lang] or return ''
|
135
|
+
# todo: match line endings of file!
|
136
|
+
return mark + LiterateRandomizer.sentence + $/
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
metadata
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: filecamo
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brad Robel-Forrest
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-08-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: better_bytes
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.0.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: literate_randomizer
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.4.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.4.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ruby-filemagic
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.7.1
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.7.1
|
55
|
+
description: Generate and manipulate entire directory trees of either binary or text
|
56
|
+
file content.
|
57
|
+
email: brad@bitpony.com
|
58
|
+
executables:
|
59
|
+
- filecamo
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- README.md
|
64
|
+
- bin/filecamo
|
65
|
+
- filecamo.gempec
|
66
|
+
- lib/filecamo.rb
|
67
|
+
- lib/filecamo/generator.rb
|
68
|
+
- lib/filecamo/text_mucker.rb
|
69
|
+
homepage: https://github.com/bradrf/filecamo#readme
|
70
|
+
licenses:
|
71
|
+
- MIT
|
72
|
+
metadata: {}
|
73
|
+
post_install_message:
|
74
|
+
rdoc_options: []
|
75
|
+
require_paths:
|
76
|
+
- lib
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0'
|
87
|
+
requirements: []
|
88
|
+
rubyforge_project:
|
89
|
+
rubygems_version: 2.5.1
|
90
|
+
signing_key:
|
91
|
+
specification_version: 4
|
92
|
+
summary: File content generator and manipulator.
|
93
|
+
test_files: []
|