benford 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "rspec", "~> 2.3.0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.6.4"
12
+ gem "rcov", ">= 0"
13
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,28 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.6.4)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rake (0.9.2)
11
+ rcov (0.9.10)
12
+ rspec (2.3.0)
13
+ rspec-core (~> 2.3.0)
14
+ rspec-expectations (~> 2.3.0)
15
+ rspec-mocks (~> 2.3.0)
16
+ rspec-core (2.3.1)
17
+ rspec-expectations (2.3.0)
18
+ diff-lcs (~> 1.1.2)
19
+ rspec-mocks (2.3.0)
20
+
21
+ PLATFORMS
22
+ ruby
23
+
24
+ DEPENDENCIES
25
+ bundler (~> 1.0.0)
26
+ jeweler (~> 1.6.4)
27
+ rcov
28
+ rspec (~> 2.3.0)
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Tijmen Brommet
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,5 @@
1
+ = benford
2
+
3
+ Little tool to check out if a dataset conforms to [url=http://en.wikipedia.org/wiki/Benford's_law]Benford's Law[/url].
4
+
5
+ Usage `benford path_to_file`
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "benford"
18
+ gem.homepage = "http://github.com/tijmenb/benford"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Check the distribution if first digits in a dataset}
21
+ gem.description = %Q{Check if a dataset conforms to Benford's Law. More info: http://en.wikipedia.org/wiki/Benford's_law}
22
+ gem.email = "tijmen@gmail.com"
23
+ gem.authors = ["Tijmen Brommet"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rake/rdoctask'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "benford #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
data/benford.gemspec ADDED
@@ -0,0 +1,62 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{benford}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = [%q{Tijmen Brommet}]
12
+ s.date = %q{2011-09-20}
13
+ s.description = %q{Check if a dataset conforms to Benford's Law. More info: http://en.wikipedia.org/wiki/Benford's_law}
14
+ s.email = %q{tijmen@gmail.com}
15
+ s.executables = [%q{benford}]
16
+ s.extra_rdoc_files = [
17
+ "LICENSE.txt",
18
+ "README.rdoc"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".rspec",
23
+ "Gemfile",
24
+ "Gemfile.lock",
25
+ "LICENSE.txt",
26
+ "README.rdoc",
27
+ "Rakefile",
28
+ "VERSION",
29
+ "benford.gemspec",
30
+ "bin/benford",
31
+ "lib/benford.rb",
32
+ "spec/benford_spec.rb",
33
+ "spec/spec_helper.rb"
34
+ ]
35
+ s.homepage = %q{http://github.com/tijmenb/benford}
36
+ s.licenses = [%q{MIT}]
37
+ s.require_paths = [%q{lib}]
38
+ s.rubygems_version = %q{1.8.5}
39
+ s.summary = %q{Check the distribution if first digits in a dataset}
40
+
41
+ if s.respond_to? :specification_version then
42
+ s.specification_version = 3
43
+
44
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
+ s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
46
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
47
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
48
+ s.add_development_dependency(%q<rcov>, [">= 0"])
49
+ else
50
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
51
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
52
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
53
+ s.add_dependency(%q<rcov>, [">= 0"])
54
+ end
55
+ else
56
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
57
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
59
+ s.add_dependency(%q<rcov>, [">= 0"])
60
+ end
61
+ end
62
+
data/bin/benford ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'pp'
4
+ require 'lib/benford'
5
+
6
+ class Float
7
+ def perc
8
+ "%.1f%" % (self * 100)
9
+ end
10
+ end
11
+
12
+ beginning = Time.now
13
+
14
+ file = File.open(ARGV[0], "rb")
15
+ b = Benford.new
16
+ data = file.read.gsub(',', '').scan(/\w+/) # remove , for correct parsing of '123,456,78'
17
+ b.load! data
18
+
19
+ puts ["digit", "count", "sample", "benford", "deviation"].join("\t\t")
20
+
21
+ (1..9).each do |d|
22
+ d = d.to_s # the digits are string
23
+ puts [d, b.counts[d], b.distribution[d].perc , b.law[d].perc, b.deviation[d].perc ].join("\t\t")
24
+ end
25
+
26
+ puts "-> Analysis of #{b.numbers.count} samples in #{Time.now - beginning} seconds"
data/lib/benford.rb ADDED
@@ -0,0 +1,63 @@
1
+ class Benford
2
+
3
+ def load!(nums)
4
+ @numbers = []
5
+ nums.each do |num|
6
+ num.gsub!(".", "")
7
+ num.gsub!(",", "")
8
+ @numbers << num.to_s if num.is_numeric?
9
+ end
10
+ end
11
+
12
+ # P(d) = log10(1 + 1/d)
13
+ def law
14
+ return @benford unless @benford.nil?
15
+ benford = {}
16
+ (1..9).each { |d| benford[d.to_s] = Math.log10( 1 + 1 / d.to_f) }
17
+ @benford = benford
18
+ end
19
+
20
+ def counts
21
+ return @digit_counts unless @digit_counts.nil?
22
+ digit_counts = Hash.new(0)
23
+ numbers.each do |v|
24
+ digit_counts[v.first] += 1
25
+ end
26
+ @digit_counts = digit_counts
27
+ end
28
+
29
+ def distribution
30
+ return @dist unless @dist.nil?
31
+ dist = Hash.new(0.0)
32
+ counts.each do |k, v|
33
+ dist[k] = v.to_f / numbers.count
34
+ end
35
+ @dist = dist
36
+ end
37
+
38
+ def numbers
39
+ @numbers
40
+ end
41
+
42
+ def deviation
43
+ return @variants unless @variants.nil?
44
+ variants = Hash.new(0.0)
45
+ law.each do |digit, occurence|
46
+ variants[digit] = distribution[digit] - occurence
47
+ end
48
+ @variants = variants
49
+ end
50
+
51
+ end
52
+
53
+
54
+ class String
55
+
56
+ def first
57
+ self.to_i.to_s[0,1]
58
+ end
59
+
60
+ def is_numeric?
61
+ self.to_s.match(/\A[+-]?\d+?(\.\d+)?\Z/) == nil ? false : true
62
+ end
63
+ end
@@ -0,0 +1,65 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Benford do
4
+
5
+ describe "after loading" do
6
+
7
+ before(:each) do
8
+ @b = Benford.new
9
+ end
10
+
11
+ it "counts the correct numbers" do
12
+ @b.load! ["6712", "1232"]
13
+ @b.numbers.count.should == 2
14
+ end
15
+
16
+ it "counts floats" do
17
+ @b.load! ["6712", "65123.1", "1232"]
18
+ @b.numbers.count.should == 3
19
+ end
20
+
21
+ it "strips strings" do
22
+ @b.load! ["6712345", "kjaadish", "2713678"]
23
+ @b.numbers.count.should == 2
24
+ end
25
+
26
+ it "converts a 1 < float into an integer" do
27
+ @b.load! ["0.123"]
28
+ @b.numbers.first.first.should == "1" # first.first is confusing, but alas
29
+ end
30
+
31
+ it "does not skip commas in numbers" do
32
+ @b.load! ["123,212.1"]
33
+ @b.numbers.count.should == 1
34
+ end
35
+
36
+ it "get the counts per number right" do
37
+ @b.load! ["18213", "2187356", "2131234", "2131234"]
38
+ @b.counts["1"].should == 1
39
+ @b.counts["2"].should == 3
40
+ end
41
+
42
+ it "gets the right distribution" do
43
+ @b.load! ["18213", "1187356", "2131234", "2131234"]
44
+ @b.distribution["1"].should == 0.5
45
+ @b.distribution["2"].should == 0.5
46
+ end
47
+
48
+ it "also get another distribution" do
49
+ @b.load! ["111", "111", "111", "222"]
50
+ @b.distribution["1"].should == 0.75
51
+ @b.distribution["2"].should == 0.25
52
+ end
53
+
54
+ it "and another distribution" do
55
+ @b.load! ["1"]
56
+ @b.distribution["1"].should == 1
57
+ end
58
+
59
+ it "gets the correct variant from the distribution" do
60
+ @b.load! ["111", "111", "222", "222"]
61
+ @b.deviation["1"].should be_between 0.19, 0.2
62
+ @b.deviation["2"].should be_between 0.32, 0.33
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'benford'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: benford
3
+ version: !ruby/object:Gem::Version
4
+ hash: 31
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 0
10
+ version: 0.0.0
11
+ platform: ruby
12
+ authors:
13
+ - Tijmen Brommet
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-09-20 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ type: :development
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 2
30
+ - 3
31
+ - 0
32
+ version: 2.3.0
33
+ prerelease: false
34
+ name: rspec
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ type: :development
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ hash: 23
44
+ segments:
45
+ - 1
46
+ - 0
47
+ - 0
48
+ version: 1.0.0
49
+ prerelease: false
50
+ name: bundler
51
+ version_requirements: *id002
52
+ - !ruby/object:Gem::Dependency
53
+ type: :development
54
+ requirement: &id003 !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ~>
58
+ - !ruby/object:Gem::Version
59
+ hash: 7
60
+ segments:
61
+ - 1
62
+ - 6
63
+ - 4
64
+ version: 1.6.4
65
+ prerelease: false
66
+ name: jeweler
67
+ version_requirements: *id003
68
+ - !ruby/object:Gem::Dependency
69
+ type: :development
70
+ requirement: &id004 !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ prerelease: false
80
+ name: rcov
81
+ version_requirements: *id004
82
+ description: "Check if a dataset conforms to Benford's Law. More info: http://en.wikipedia.org/wiki/Benford's_law"
83
+ email: tijmen@gmail.com
84
+ executables:
85
+ - benford
86
+ extensions: []
87
+
88
+ extra_rdoc_files:
89
+ - LICENSE.txt
90
+ - README.rdoc
91
+ files:
92
+ - .document
93
+ - .rspec
94
+ - Gemfile
95
+ - Gemfile.lock
96
+ - LICENSE.txt
97
+ - README.rdoc
98
+ - Rakefile
99
+ - VERSION
100
+ - benford.gemspec
101
+ - bin/benford
102
+ - lib/benford.rb
103
+ - spec/benford_spec.rb
104
+ - spec/spec_helper.rb
105
+ homepage: http://github.com/tijmenb/benford
106
+ licenses:
107
+ - MIT
108
+ post_install_message:
109
+ rdoc_options: []
110
+
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ hash: 3
119
+ segments:
120
+ - 0
121
+ version: "0"
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ">="
126
+ - !ruby/object:Gem::Version
127
+ hash: 3
128
+ segments:
129
+ - 0
130
+ version: "0"
131
+ requirements: []
132
+
133
+ rubyforge_project:
134
+ rubygems_version: 1.8.5
135
+ signing_key:
136
+ specification_version: 3
137
+ summary: Check the distribution if first digits in a dataset
138
+ test_files: []
139
+