benford 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "rspec", "~> 2.3.0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.6.4"
12
+ gem "rcov", ">= 0"
13
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,28 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.6.4)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rake (0.9.2)
11
+ rcov (0.9.10)
12
+ rspec (2.3.0)
13
+ rspec-core (~> 2.3.0)
14
+ rspec-expectations (~> 2.3.0)
15
+ rspec-mocks (~> 2.3.0)
16
+ rspec-core (2.3.1)
17
+ rspec-expectations (2.3.0)
18
+ diff-lcs (~> 1.1.2)
19
+ rspec-mocks (2.3.0)
20
+
21
+ PLATFORMS
22
+ ruby
23
+
24
+ DEPENDENCIES
25
+ bundler (~> 1.0.0)
26
+ jeweler (~> 1.6.4)
27
+ rcov
28
+ rspec (~> 2.3.0)
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Tijmen Brommet
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,5 @@
1
+ = benford
2
+
3
+ Little tool to check out if a dataset conforms to [url=http://en.wikipedia.org/wiki/Benford's_law]Benford's Law[/url].
4
+
5
+ Usage `benford path_to_file`
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "benford"
18
+ gem.homepage = "http://github.com/tijmenb/benford"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Check the distribution if first digits in a dataset}
21
+ gem.description = %Q{Check if a dataset conforms to Benford's Law. More info: http://en.wikipedia.org/wiki/Benford's_law}
22
+ gem.email = "tijmen@gmail.com"
23
+ gem.authors = ["Tijmen Brommet"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rake/rdoctask'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "benford #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
data/benford.gemspec ADDED
@@ -0,0 +1,62 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{benford}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = [%q{Tijmen Brommet}]
12
+ s.date = %q{2011-09-20}
13
+ s.description = %q{Check if a dataset conforms to Benford's Law. More info: http://en.wikipedia.org/wiki/Benford's_law}
14
+ s.email = %q{tijmen@gmail.com}
15
+ s.executables = [%q{benford}]
16
+ s.extra_rdoc_files = [
17
+ "LICENSE.txt",
18
+ "README.rdoc"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".rspec",
23
+ "Gemfile",
24
+ "Gemfile.lock",
25
+ "LICENSE.txt",
26
+ "README.rdoc",
27
+ "Rakefile",
28
+ "VERSION",
29
+ "benford.gemspec",
30
+ "bin/benford",
31
+ "lib/benford.rb",
32
+ "spec/benford_spec.rb",
33
+ "spec/spec_helper.rb"
34
+ ]
35
+ s.homepage = %q{http://github.com/tijmenb/benford}
36
+ s.licenses = [%q{MIT}]
37
+ s.require_paths = [%q{lib}]
38
+ s.rubygems_version = %q{1.8.5}
39
+ s.summary = %q{Check the distribution if first digits in a dataset}
40
+
41
+ if s.respond_to? :specification_version then
42
+ s.specification_version = 3
43
+
44
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
+ s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
46
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
47
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
48
+ s.add_development_dependency(%q<rcov>, [">= 0"])
49
+ else
50
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
51
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
52
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
53
+ s.add_dependency(%q<rcov>, [">= 0"])
54
+ end
55
+ else
56
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
57
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
59
+ s.add_dependency(%q<rcov>, [">= 0"])
60
+ end
61
+ end
62
+
data/bin/benford ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'pp'
4
+ require 'lib/benford'
5
+
6
+ class Float
7
+ def perc
8
+ "%.1f%" % (self * 100)
9
+ end
10
+ end
11
+
12
+ beginning = Time.now
13
+
14
+ file = File.open(ARGV[0], "rb")
15
+ b = Benford.new
16
+ data = file.read.gsub(',', '').scan(/\w+/) # remove , for correct parsing of '123,456,78'
17
+ b.load! data
18
+
19
+ puts ["digit", "count", "sample", "benford", "deviation"].join("\t\t")
20
+
21
+ (1..9).each do |d|
22
+ d = d.to_s # the digits are string
23
+ puts [d, b.counts[d], b.distribution[d].perc , b.law[d].perc, b.deviation[d].perc ].join("\t\t")
24
+ end
25
+
26
+ puts "-> Analysis of #{b.numbers.count} samples in #{Time.now - beginning} seconds"
data/lib/benford.rb ADDED
@@ -0,0 +1,63 @@
1
+ class Benford
2
+
3
+ def load!(nums)
4
+ @numbers = []
5
+ nums.each do |num|
6
+ num.gsub!(".", "")
7
+ num.gsub!(",", "")
8
+ @numbers << num.to_s if num.is_numeric?
9
+ end
10
+ end
11
+
12
+ # P(d) = log10(1 + 1/d)
13
+ def law
14
+ return @benford unless @benford.nil?
15
+ benford = {}
16
+ (1..9).each { |d| benford[d.to_s] = Math.log10( 1 + 1 / d.to_f) }
17
+ @benford = benford
18
+ end
19
+
20
+ def counts
21
+ return @digit_counts unless @digit_counts.nil?
22
+ digit_counts = Hash.new(0)
23
+ numbers.each do |v|
24
+ digit_counts[v.first] += 1
25
+ end
26
+ @digit_counts = digit_counts
27
+ end
28
+
29
+ def distribution
30
+ return @dist unless @dist.nil?
31
+ dist = Hash.new(0.0)
32
+ counts.each do |k, v|
33
+ dist[k] = v.to_f / numbers.count
34
+ end
35
+ @dist = dist
36
+ end
37
+
38
+ def numbers
39
+ @numbers
40
+ end
41
+
42
+ def deviation
43
+ return @variants unless @variants.nil?
44
+ variants = Hash.new(0.0)
45
+ law.each do |digit, occurence|
46
+ variants[digit] = distribution[digit] - occurence
47
+ end
48
+ @variants = variants
49
+ end
50
+
51
+ end
52
+
53
+
54
+ class String
55
+
56
+ def first
57
+ self.to_i.to_s[0,1]
58
+ end
59
+
60
+ def is_numeric?
61
+ self.to_s.match(/\A[+-]?\d+?(\.\d+)?\Z/) == nil ? false : true
62
+ end
63
+ end
@@ -0,0 +1,65 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Benford do
4
+
5
+ describe "after loading" do
6
+
7
+ before(:each) do
8
+ @b = Benford.new
9
+ end
10
+
11
+ it "counts the correct numbers" do
12
+ @b.load! ["6712", "1232"]
13
+ @b.numbers.count.should == 2
14
+ end
15
+
16
+ it "counts floats" do
17
+ @b.load! ["6712", "65123.1", "1232"]
18
+ @b.numbers.count.should == 3
19
+ end
20
+
21
+ it "strips strings" do
22
+ @b.load! ["6712345", "kjaadish", "2713678"]
23
+ @b.numbers.count.should == 2
24
+ end
25
+
26
+ it "converts a 1 < float into an integer" do
27
+ @b.load! ["0.123"]
28
+ @b.numbers.first.first.should == "1" # first.first is confusing, but alas
29
+ end
30
+
31
+ it "does not skip commas in numbers" do
32
+ @b.load! ["123,212.1"]
33
+ @b.numbers.count.should == 1
34
+ end
35
+
36
+ it "get the counts per number right" do
37
+ @b.load! ["18213", "2187356", "2131234", "2131234"]
38
+ @b.counts["1"].should == 1
39
+ @b.counts["2"].should == 3
40
+ end
41
+
42
+ it "gets the right distribution" do
43
+ @b.load! ["18213", "1187356", "2131234", "2131234"]
44
+ @b.distribution["1"].should == 0.5
45
+ @b.distribution["2"].should == 0.5
46
+ end
47
+
48
+ it "also get another distribution" do
49
+ @b.load! ["111", "111", "111", "222"]
50
+ @b.distribution["1"].should == 0.75
51
+ @b.distribution["2"].should == 0.25
52
+ end
53
+
54
+ it "and another distribution" do
55
+ @b.load! ["1"]
56
+ @b.distribution["1"].should == 1
57
+ end
58
+
59
+ it "gets the correct variant from the distribution" do
60
+ @b.load! ["111", "111", "222", "222"]
61
+ @b.deviation["1"].should be_between 0.19, 0.2
62
+ @b.deviation["2"].should be_between 0.32, 0.33
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'benford'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: benford
3
+ version: !ruby/object:Gem::Version
4
+ hash: 31
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 0
10
+ version: 0.0.0
11
+ platform: ruby
12
+ authors:
13
+ - Tijmen Brommet
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-09-20 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ type: :development
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ hash: 3
28
+ segments:
29
+ - 2
30
+ - 3
31
+ - 0
32
+ version: 2.3.0
33
+ prerelease: false
34
+ name: rspec
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ type: :development
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ hash: 23
44
+ segments:
45
+ - 1
46
+ - 0
47
+ - 0
48
+ version: 1.0.0
49
+ prerelease: false
50
+ name: bundler
51
+ version_requirements: *id002
52
+ - !ruby/object:Gem::Dependency
53
+ type: :development
54
+ requirement: &id003 !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ~>
58
+ - !ruby/object:Gem::Version
59
+ hash: 7
60
+ segments:
61
+ - 1
62
+ - 6
63
+ - 4
64
+ version: 1.6.4
65
+ prerelease: false
66
+ name: jeweler
67
+ version_requirements: *id003
68
+ - !ruby/object:Gem::Dependency
69
+ type: :development
70
+ requirement: &id004 !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ prerelease: false
80
+ name: rcov
81
+ version_requirements: *id004
82
+ description: "Check if a dataset conforms to Benford's Law. More info: http://en.wikipedia.org/wiki/Benford's_law"
83
+ email: tijmen@gmail.com
84
+ executables:
85
+ - benford
86
+ extensions: []
87
+
88
+ extra_rdoc_files:
89
+ - LICENSE.txt
90
+ - README.rdoc
91
+ files:
92
+ - .document
93
+ - .rspec
94
+ - Gemfile
95
+ - Gemfile.lock
96
+ - LICENSE.txt
97
+ - README.rdoc
98
+ - Rakefile
99
+ - VERSION
100
+ - benford.gemspec
101
+ - bin/benford
102
+ - lib/benford.rb
103
+ - spec/benford_spec.rb
104
+ - spec/spec_helper.rb
105
+ homepage: http://github.com/tijmenb/benford
106
+ licenses:
107
+ - MIT
108
+ post_install_message:
109
+ rdoc_options: []
110
+
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ hash: 3
119
+ segments:
120
+ - 0
121
+ version: "0"
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ">="
126
+ - !ruby/object:Gem::Version
127
+ hash: 3
128
+ segments:
129
+ - 0
130
+ version: "0"
131
+ requirements: []
132
+
133
+ rubyforge_project:
134
+ rubygems_version: 1.8.5
135
+ signing_key:
136
+ specification_version: 3
137
+ summary: Check the distribution if first digits in a dataset
138
+ test_files: []
139
+