genderstat 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9d948276334bd55efb359df9eb7b3fb86dbfc3c1
4
+ data.tar.gz: a462a1c66d02849eca80ade99bfc5eeec7fa7d6b
5
+ SHA512:
6
+ metadata.gz: 123eca8ae0ded0774bcdfbbd4fb1754f6db043449e910ffebda9f1bcd835f3cad5d9df97ebfb2a93b28b0cde98298a8a833f675b260530f24ba4401f59073ec1
7
+ data.tar.gz: 90d1988ad9bbb61896d08bf9fe31079e16064dfe5bdcad29c1f656904056dfc6abfde7715e81b01349892b2c435e185e30f5faff83e86f72b24e5f6fa67d16ee
@@ -0,0 +1,3 @@
1
+ *.txt
2
+ Gemfile.lock
3
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'open_uri_redirections'
@@ -0,0 +1,133 @@
1
+ genderstat
2
+ ===
3
+ genderstat calculates the amount (in terms of percentages and relative ratios) of gendered language in a file or on a site.
4
+
5
+ Specifically, it counts the number of gendered:
6
+ - subject pronouns (she/he)
7
+ - object pronouns (her/him)
8
+ - possessives (hers/his)
9
+ - generic nouns (woman/man, girl/boy, womankind/mankind, etc)
10
+
11
+ It does so both in absolute numbers, and also calculates the percent.
12
+
13
+ Additionally, it offers the ratios of between each too (e.g. 3.2 times as many
14
+ masculine words as feminine words).
15
+
16
+ Word Lists
17
+ ---
18
+ genderstat comes with three wordlists:
19
+ - ```masculine_words.yaml```
20
+ - ```feminine_words.yaml```
21
+ - ```neutral_words.yaml```
22
+
23
+ They're completely editable, and genderstat will calculate for any files in the
24
+ directory names ending with ```_words.yaml```
25
+
26
+
27
+ Installation
28
+ -------
29
+
30
+ `gem install genderstat`
31
+
32
+ Usage
33
+ ---
34
+
35
+ `genderstat [FILE]`
36
+
37
+ or
38
+
39
+ `genderstat [URL]`
40
+
41
+
42
+ ### File example ###
43
+
44
+ The `ralph-waldo-emerson.txt` file is a collection of his essays and `kate-chopin.txt` is a collection of her stories (including The Awakening).
45
+ ```
46
+ % ruby -Ilib bin/genderstat ralph-waldo-emerson.txt
47
+ total words: 77181
48
+ feminine words: 85
49
+ masculine words: 1942
50
+ neutral words: 1714
51
+
52
+ feminine words: 0.11%
53
+ masculine words: 2.52%
54
+ neutral words: 2.22%
55
+
56
+ The ratio of feminine to masculine words is 0.04
57
+ The ratio of feminine to neutral words is 0.05
58
+ The ratio of masculine to feminine words is 22.85
59
+ The ratio of masculine to neutral words is 1.13
60
+ The ratio of neutral to feminine words is 20.16
61
+ The ratio of neutral to masculine words is 0.88
62
+
63
+
64
+ % ruby -Ilib bin/genderstat kate-chopin.txt
65
+ total words: 67023
66
+ feminine words: 3147
67
+ masculine words: 1724
68
+ neutral words: 1095
69
+
70
+ feminine words: 4.7%
71
+ masculine words: 2.57%
72
+ neutral words: 1.63%
73
+
74
+ The ratio of feminine to masculine words is 1.83
75
+ The ratio of feminine to neutral words is 2.87
76
+ The ratio of masculine to feminine words is 0.55
77
+ The ratio of masculine to neutral words is 1.57
78
+ The ratio of neutral to feminine words is 0.35
79
+ The ratio of neutral to masculine words is 0.64
80
+ ```
81
+
82
+ ### URL example ###
83
+ ```
84
+ % ruby -Ilib bin/genderstat feministing.com
85
+ total words: 4908
86
+ feminine words: 20
87
+ masculine words: 3
88
+ neutral words: 17
89
+
90
+ feminine words: 0.41%
91
+ masculine words: 0.06%
92
+ neutral words: 0.35%
93
+
94
+ The ratio of feminine to masculine words is 6.67
95
+ The ratio of feminine to neutral words is 1.18
96
+ The ratio of masculine to feminine words is 0.15
97
+ The ratio of masculine to neutral words is 0.18
98
+ The ratio of neutral to feminine words is 0.85
99
+ The ratio of neutral to masculine words is 5.67
100
+
101
+ %ruby -Ilib bin/genderstat stallman.org
102
+ total words: 5673
103
+ feminine words: 1
104
+ masculine words: 16
105
+ neutral words: 55
106
+
107
+ feminine words: 0.02%
108
+ masculine words: 0.28%
109
+ neutral words: 0.97%
110
+
111
+ The ratio of feminine to masculine words is 0.06
112
+ The ratio of feminine to neutral words is 0.02
113
+ The ratio of masculine to feminine words is 16.0
114
+ The ratio of masculine to neutral words is 0.29
115
+ The ratio of neutral to feminine words is 55.0
116
+ The ratio of neutral to masculine words is 3.44
117
+ ```
118
+ todo
119
+ ---
120
+ - Add tests
121
+ - Add support for more stats (like statistical significance?)
122
+ - Add support for reading from stdin
123
+ - Handle scenario of https->redirect (rather than weird nil error)
124
+
125
+ Dependencies
126
+ ------------
127
+ - Ruby 2.0+
128
+ - ```open_uri_redirections``` gem
129
+
130
+ You can get the gem by running ```bundle install```.
131
+
132
+ It's not strictly necessary, it's just allows for HTTP->HTTPS redirections.
133
+
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby -I ../lib
2
+
3
+ require 'genderstat'
4
+
5
+ if ARGV.length != 1
6
+ abort("Usage: genderstat [FILE] or genderstat [URL]")
7
+ end
8
+
9
+ genderstat = Genderstat.new(ARGV[0])
10
+ genderstat.calculate
11
+ genderstat.print_all_results
@@ -0,0 +1,11 @@
1
+ - she
2
+ - her
3
+ - hers
4
+ - she's
5
+ - herself
6
+ - woman
7
+ - lady
8
+ - girl
9
+ - womankind
10
+ - women
11
+ - ladies
@@ -0,0 +1,17 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require "genderstat/version"
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'genderstat'
6
+ s.version = Genderstat::VERSION
7
+ s.date = '2014-08-23'
8
+ s.summary = "A gendered language frequency calculator"
9
+ s.description = "Calculate the relative frequencies of gendered language in a file or on the web"
10
+ s.authors = ["Sean Collins"]
11
+ s.email = 'sean@cllns.com'
12
+ s.files = `git ls-files`.split($/)
13
+ s.executables = 'genderstat'
14
+ s.required_ruby_version = '>= 2.0'
15
+ s.homepage = 'http://github.org/cllns/genderstat'
16
+ s.license = 'MIT'
17
+ end
@@ -0,0 +1,48 @@
1
+ #! /usr/bin/ruby
2
+ # A program to determine how "masculine" a piece of writing is.
3
+ # Sean Collins 11/11/12
4
+ require 'yaml'
5
+ require 'genderstat/text_reader'
6
+ require 'genderstat/word_counters'
7
+
8
+ class Genderstat
9
+ DECIMAL_DIGITS_OF_PRECISION = 2
10
+
11
+ def initialize arg
12
+ @word_counters = WordCounters.new
13
+ @text_reader = TextReader.new arg
14
+ end
15
+
16
+ def calculate
17
+ @all_words = @text_reader.read
18
+ @all_words.each { |word| @word_counters.check(word) }
19
+ end
20
+
21
+ def print_all_results
22
+ puts "total words: #{@all_words.count.to_s}"
23
+ print_totals
24
+ print_percentages
25
+ print_ratios
26
+ end
27
+
28
+ def print_totals
29
+ @word_counters.get_totals.each do |name, count|
30
+ puts "#{name} words: #{count}"
31
+ end
32
+ puts
33
+ end
34
+
35
+ def print_percentages
36
+ @word_counters.get_percentages(@all_words.count).each do |name, percentage|
37
+ puts "#{name} words: #{percentage}%"
38
+ end
39
+ puts
40
+ end
41
+
42
+ def print_ratios
43
+ @word_counters.get_ratios.each do |name, ratio|
44
+ puts "The ratio of #{name.gsub("_", " ")} words is #{ratio}"
45
+ end
46
+ puts
47
+ end
48
+ end
@@ -0,0 +1,9 @@
1
+ class FileReader
2
+ def initialize file_name
3
+ @file_name = file_name
4
+ end
5
+
6
+ def read
7
+ IO.read @file_name
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ require 'genderstat/file_reader'
2
+ require 'genderstat/web_reader'
3
+
4
+ class TextReader
5
+ def initialize file_locator
6
+ if File.exist? file_locator
7
+ @reader = FileReader.new file_locator
8
+ else
9
+ @reader = WebReader.new file_locator
10
+ end
11
+ end
12
+
13
+ def read
14
+ @reader.read.downcase.split
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module Genderstat
2
+ VERSION = '0.0.2'
3
+ end
@@ -0,0 +1,35 @@
1
+ require 'open-uri'
2
+ require 'open_uri_redirections'
3
+ require 'socket'
4
+
5
+ class WebReader
6
+ def initialize url
7
+ @url = clean_up_url url
8
+ end
9
+
10
+ def read
11
+ begin
12
+ open(@url, :allow_redirections => :safe).read
13
+ rescue Exception => ex
14
+ handle_web_exceptions ex
15
+ end
16
+ end
17
+
18
+ private
19
+ def handle_web_exceptions ex
20
+ if ex == OpenURI::HTTPError || ex == SocketError
21
+ abort "Could not open: #{url}"
22
+ else
23
+ abort ex.to_s
24
+ end
25
+ end
26
+
27
+ # if the URL is doesn't match the URI regex, then prepend it with http://
28
+ def clean_up_url url
29
+ if (url =~ URI.regexp).nil?
30
+ "http://#{url}"
31
+ else
32
+ url
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,15 @@
1
+ require 'set'
2
+
3
+ class WordCounter
4
+ attr_reader :count, :name
5
+
6
+ def initialize filename
7
+ @words = Set.new YAML.load_file(filename)
8
+ @name = filename.split('_').first.split('/').last
9
+ @count = 0
10
+ end
11
+
12
+ def is_in_here? word_in_question
13
+ @count += 1 if @words.include? word_in_question
14
+ end
15
+ end
@@ -0,0 +1,53 @@
1
+ require 'genderstat/word_counter'
2
+
3
+ class WordCounters
4
+ def initialize
5
+ @word_counters = []
6
+ # The word lists are two directories higher than this file, so that's
7
+ # how we have to reference their locations
8
+ this_file_path = File.dirname(__FILE__)
9
+ word_list_relative_location = "../../*_words.yaml"
10
+ word_list_location = File.join(this_file_path, word_list_relative_location)
11
+
12
+ Dir.glob(word_list_location).each do |filename|
13
+ @word_counters << WordCounter.new(filename)
14
+ end
15
+ end
16
+
17
+
18
+ def check word
19
+ @word_counters.each { |wc| wc.is_in_here? word }
20
+ end
21
+
22
+ def get_totals
23
+ @word_counters.each_with_object({}) do |wc, hash|
24
+ hash[wc.name] = wc.count
25
+ end
26
+ end
27
+
28
+ def get_percentages total_word_count
29
+ @word_counters.each_with_object({}) do |wc, hash|
30
+ hash[wc.name] = round( 100 * (wc.count.to_f / total_word_count))
31
+ end
32
+ end
33
+
34
+ # We get the ratios of counts among all word_lists
35
+ # This returns a hash with keys that are named "name_to_other_name"
36
+ # This loop does twice as much work as it needs to, since the ratios
37
+ # are reciprocals of one another, but oh well! It's only division.
38
+ def get_ratios
39
+ @word_counters.each_with_object({}) do |wc, hash|
40
+ # We skip over the ratio of name_to_name, since it'll always be one
41
+ @other_word_counters = @word_counters - [wc]
42
+ @other_word_counters.each do |other_wc|
43
+ ratio = round(( wc.count.to_f / other_wc.count.to_f) )
44
+ hash["#{wc.name}_to_#{other_wc.name}"] = ratio
45
+ end
46
+ end
47
+ end
48
+
49
+ def round float
50
+ float.round(Genderstat::DECIMAL_DIGITS_OF_PRECISION)
51
+ end
52
+
53
+ end
@@ -0,0 +1,11 @@
1
+ - he
2
+ - him
3
+ - his
4
+ - he's
5
+ - himself
6
+ - man
7
+ - dude
8
+ - boy
9
+ - mankind
10
+ - men
11
+ - dudes
@@ -0,0 +1,9 @@
1
+ - it
2
+ - it's
3
+ - its
4
+ - they
5
+ - their
6
+ - someone
7
+ - anyone
8
+ - somebody
9
+ - someone
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: genderstat
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Sean Collins
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-23 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Calculate the relative frequencies of gendered language in a file or
14
+ on the web
15
+ email: sean@cllns.com
16
+ executables:
17
+ - genderstat
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".gitignore"
22
+ - Gemfile
23
+ - README.md
24
+ - bin/genderstat
25
+ - feminine_words.yaml
26
+ - genderstat.gemspec
27
+ - lib/genderstat.rb
28
+ - lib/genderstat/file_reader.rb
29
+ - lib/genderstat/text_reader.rb
30
+ - lib/genderstat/version.rb
31
+ - lib/genderstat/web_reader.rb
32
+ - lib/genderstat/word_counter.rb
33
+ - lib/genderstat/word_counters.rb
34
+ - license.txt
35
+ - masculine_words.yaml
36
+ - neutral_words.yaml
37
+ homepage: http://github.org/cllns/genderstat
38
+ licenses:
39
+ - MIT
40
+ metadata: {}
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '2.0'
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubyforge_project:
57
+ rubygems_version: 2.2.0
58
+ signing_key:
59
+ specification_version: 4
60
+ summary: A gendered language frequency calculator
61
+ test_files: []