genderstat 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9d948276334bd55efb359df9eb7b3fb86dbfc3c1
4
+ data.tar.gz: a462a1c66d02849eca80ade99bfc5eeec7fa7d6b
5
+ SHA512:
6
+ metadata.gz: 123eca8ae0ded0774bcdfbbd4fb1754f6db043449e910ffebda9f1bcd835f3cad5d9df97ebfb2a93b28b0cde98298a8a833f675b260530f24ba4401f59073ec1
7
+ data.tar.gz: 90d1988ad9bbb61896d08bf9fe31079e16064dfe5bdcad29c1f656904056dfc6abfde7715e81b01349892b2c435e185e30f5faff83e86f72b24e5f6fa67d16ee
@@ -0,0 +1,3 @@
1
+ *.txt
2
+ Gemfile.lock
3
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'open_uri_redirections'
@@ -0,0 +1,133 @@
1
+ genderstat
2
+ ===
3
+ genderstat calculates the amount (in terms of percentages and relative ratios) of gendered language in a file or on a site.
4
+
5
+ Specifically, it counts the number of gendered:
6
+ - subject pronouns (she/he)
7
+ - object pronouns (her/him)
8
+ - possessives (hers/his)
9
+ - generic nouns (woman/man, girl/boy, womankind/mankind, etc)
10
+
11
+ It does so both in absolute numbers, and also calculates the percent.
12
+
13
+ Additionally, it offers the ratios of between each too (e.g. 3.2 times as many
14
+ masculine words as feminine words).
15
+
16
+ Word Lists
17
+ ---
18
+ genderstat comes with three wordlists:
19
+ - ```masculine_words.yaml```
20
+ - ```feminine_words.yaml```
21
+ - ```neutral_words.yaml```
22
+
23
+ They're completely editable, and genderstat will calculate for any files in the
24
+ directory names ending with ```_words.yaml```
25
+
26
+
27
+ Installation
28
+ -------
29
+
30
+ `gem install genderstat`
31
+
32
+ Usage
33
+ ---
34
+
35
+ `genderstat [FILE]`
36
+
37
+ or
38
+
39
+ `genderstat [URL]`
40
+
41
+
42
+ ### File example ###
43
+
44
+ The `ralph-waldo-emerson.txt` file is a collection of his essays and `kate-chopin.txt` is a collection of her stories (including The Awakening).
45
+ ```
46
+ % ruby -Ilib bin/genderstat ralph-waldo-emerson.txt
47
+ total words: 77181
48
+ feminine words: 85
49
+ masculine words: 1942
50
+ neutral words: 1714
51
+
52
+ feminine words: 0.11%
53
+ masculine words: 2.52%
54
+ neutral words: 2.22%
55
+
56
+ The ratio of feminine to masculine words is 0.04
57
+ The ratio of feminine to neutral words is 0.05
58
+ The ratio of masculine to feminine words is 22.85
59
+ The ratio of masculine to neutral words is 1.13
60
+ The ratio of neutral to feminine words is 20.16
61
+ The ratio of neutral to masculine words is 0.88
62
+
63
+
64
+ % ruby -Ilib bin/genderstat kate-chopin.txt
65
+ total words: 67023
66
+ feminine words: 3147
67
+ masculine words: 1724
68
+ neutral words: 1095
69
+
70
+ feminine words: 4.7%
71
+ masculine words: 2.57%
72
+ neutral words: 1.63%
73
+
74
+ The ratio of feminine to masculine words is 1.83
75
+ The ratio of feminine to neutral words is 2.87
76
+ The ratio of masculine to feminine words is 0.55
77
+ The ratio of masculine to neutral words is 1.57
78
+ The ratio of neutral to feminine words is 0.35
79
+ The ratio of neutral to masculine words is 0.64
80
+ ```
81
+
82
+ ### URL example ###
83
+ ```
84
+ % ruby -Ilib bin/genderstat feministing.com
85
+ total words: 4908
86
+ feminine words: 20
87
+ masculine words: 3
88
+ neutral words: 17
89
+
90
+ feminine words: 0.41%
91
+ masculine words: 0.06%
92
+ neutral words: 0.35%
93
+
94
+ The ratio of feminine to masculine words is 6.67
95
+ The ratio of feminine to neutral words is 1.18
96
+ The ratio of masculine to feminine words is 0.15
97
+ The ratio of masculine to neutral words is 0.18
98
+ The ratio of neutral to feminine words is 0.85
99
+ The ratio of neutral to masculine words is 5.67
100
+
101
+ %ruby -Ilib bin/genderstat stallman.org
102
+ total words: 5673
103
+ feminine words: 1
104
+ masculine words: 16
105
+ neutral words: 55
106
+
107
+ feminine words: 0.02%
108
+ masculine words: 0.28%
109
+ neutral words: 0.97%
110
+
111
+ The ratio of feminine to masculine words is 0.06
112
+ The ratio of feminine to neutral words is 0.02
113
+ The ratio of masculine to feminine words is 16.0
114
+ The ratio of masculine to neutral words is 0.29
115
+ The ratio of neutral to feminine words is 55.0
116
+ The ratio of neutral to masculine words is 3.44
117
+ ```
118
+ todo
119
+ ---
120
+ - Add tests
121
+ - Add support for more stats (like statistical significance?)
122
+ - Add support for reading from stdin
123
+ - Handle scenario of https->redirect (rather than weird nil error)
124
+
125
+ Dependencies
126
+ ------------
127
+ - Ruby 2.0+
128
+ - ```open_uri_redirections``` gem
129
+
130
+ You can get the gem by running ```bundle install```.
131
+
132
+ It's not strictly necessary, it's just allows for HTTP->HTTPS redirections.
133
+
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby -I ../lib
2
+
3
+ require 'genderstat'
4
+
5
+ if ARGV.length != 1
6
+ abort("Usage: genderstat [FILE] or genderstat [URL]")
7
+ end
8
+
9
+ genderstat = Genderstat.new(ARGV[0])
10
+ genderstat.calculate
11
+ genderstat.print_all_results
@@ -0,0 +1,11 @@
1
+ - she
2
+ - her
3
+ - hers
4
+ - she's
5
+ - herself
6
+ - woman
7
+ - lady
8
+ - girl
9
+ - womankind
10
+ - women
11
+ - ladies
@@ -0,0 +1,17 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require "genderstat/version"
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'genderstat'
6
+ s.version = Genderstat::VERSION
7
+ s.date = '2014-08-23'
8
+ s.summary = "A gendered language frequency calculator"
9
+ s.description = "Calculate the relative frequencies of gendered language in a file or on the web"
10
+ s.authors = ["Sean Collins"]
11
+ s.email = 'sean@cllns.com'
12
+ s.files = `git ls-files`.split($/)
13
+ s.executables = 'genderstat'
14
+ s.required_ruby_version = '>= 2.0'
15
+ s.homepage = 'http://github.org/cllns/genderstat'
16
+ s.license = 'MIT'
17
+ end
@@ -0,0 +1,48 @@
1
+ #! /usr/bin/ruby
2
+ # A program to determine how "masculine" a piece of writing is.
3
+ # Sean Collins 11/11/12
4
+ require 'yaml'
5
+ require 'genderstat/text_reader'
6
+ require 'genderstat/word_counters'
7
+
8
+ class Genderstat
9
+ DECIMAL_DIGITS_OF_PRECISION = 2
10
+
11
+ def initialize arg
12
+ @word_counters = WordCounters.new
13
+ @text_reader = TextReader.new arg
14
+ end
15
+
16
+ def calculate
17
+ @all_words = @text_reader.read
18
+ @all_words.each { |word| @word_counters.check(word) }
19
+ end
20
+
21
+ def print_all_results
22
+ puts "total words: #{@all_words.count.to_s}"
23
+ print_totals
24
+ print_percentages
25
+ print_ratios
26
+ end
27
+
28
+ def print_totals
29
+ @word_counters.get_totals.each do |name, count|
30
+ puts "#{name} words: #{count}"
31
+ end
32
+ puts
33
+ end
34
+
35
+ def print_percentages
36
+ @word_counters.get_percentages(@all_words.count).each do |name, percentage|
37
+ puts "#{name} words: #{percentage}%"
38
+ end
39
+ puts
40
+ end
41
+
42
+ def print_ratios
43
+ @word_counters.get_ratios.each do |name, ratio|
44
+ puts "The ratio of #{name.gsub("_", " ")} words is #{ratio}"
45
+ end
46
+ puts
47
+ end
48
+ end
@@ -0,0 +1,9 @@
1
+ class FileReader
2
+ def initialize file_name
3
+ @file_name = file_name
4
+ end
5
+
6
+ def read
7
+ IO.read @file_name
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ require 'genderstat/file_reader'
2
+ require 'genderstat/web_reader'
3
+
4
+ class TextReader
5
+ def initialize file_locator
6
+ if File.exist? file_locator
7
+ @reader = FileReader.new file_locator
8
+ else
9
+ @reader = WebReader.new file_locator
10
+ end
11
+ end
12
+
13
+ def read
14
+ @reader.read.downcase.split
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module Genderstat
2
+ VERSION = '0.0.2'
3
+ end
@@ -0,0 +1,35 @@
1
+ require 'open-uri'
2
+ require 'open_uri_redirections'
3
+ require 'socket'
4
+
5
+ class WebReader
6
+ def initialize url
7
+ @url = clean_up_url url
8
+ end
9
+
10
+ def read
11
+ begin
12
+ open(@url, :allow_redirections => :safe).read
13
+ rescue Exception => ex
14
+ handle_web_exceptions ex
15
+ end
16
+ end
17
+
18
+ private
19
+ def handle_web_exceptions ex
20
+ if ex == OpenURI::HTTPError || ex == SocketError
21
+ abort "Could not open: #{url}"
22
+ else
23
+ abort ex.to_s
24
+ end
25
+ end
26
+
27
+ # if the URL is doesn't match the URI regex, then prepend it with http://
28
+ def clean_up_url url
29
+ if (url =~ URI.regexp).nil?
30
+ "http://#{url}"
31
+ else
32
+ url
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,15 @@
1
+ require 'set'
2
+
3
+ class WordCounter
4
+ attr_reader :count, :name
5
+
6
+ def initialize filename
7
+ @words = Set.new YAML.load_file(filename)
8
+ @name = filename.split('_').first.split('/').last
9
+ @count = 0
10
+ end
11
+
12
+ def is_in_here? word_in_question
13
+ @count += 1 if @words.include? word_in_question
14
+ end
15
+ end
@@ -0,0 +1,53 @@
1
+ require 'genderstat/word_counter'
2
+
3
+ class WordCounters
4
+ def initialize
5
+ @word_counters = []
6
+ # The word lists are two directories higher than this file, so that's
7
+ # how we have to reference their locations
8
+ this_file_path = File.dirname(__FILE__)
9
+ word_list_relative_location = "../../*_words.yaml"
10
+ word_list_location = File.join(this_file_path, word_list_relative_location)
11
+
12
+ Dir.glob(word_list_location).each do |filename|
13
+ @word_counters << WordCounter.new(filename)
14
+ end
15
+ end
16
+
17
+
18
+ def check word
19
+ @word_counters.each { |wc| wc.is_in_here? word }
20
+ end
21
+
22
+ def get_totals
23
+ @word_counters.each_with_object({}) do |wc, hash|
24
+ hash[wc.name] = wc.count
25
+ end
26
+ end
27
+
28
+ def get_percentages total_word_count
29
+ @word_counters.each_with_object({}) do |wc, hash|
30
+ hash[wc.name] = round( 100 * (wc.count.to_f / total_word_count))
31
+ end
32
+ end
33
+
34
+ # We get the ratios of counts among all word_lists
35
+ # This returns a hash with keys that are named "name_to_other_name"
36
+ # This loop does twice as much work as it needs to, since the ratios
37
+ # are reciprocals of one another, but oh well! It's only division.
38
+ def get_ratios
39
+ @word_counters.each_with_object({}) do |wc, hash|
40
+ # We skip over the ratio of name_to_name, since it'll always be one
41
+ @other_word_counters = @word_counters - [wc]
42
+ @other_word_counters.each do |other_wc|
43
+ ratio = round(( wc.count.to_f / other_wc.count.to_f) )
44
+ hash["#{wc.name}_to_#{other_wc.name}"] = ratio
45
+ end
46
+ end
47
+ end
48
+
49
+ def round float
50
+ float.round(Genderstat::DECIMAL_DIGITS_OF_PRECISION)
51
+ end
52
+
53
+ end
@@ -0,0 +1,11 @@
1
+ - he
2
+ - him
3
+ - his
4
+ - he's
5
+ - himself
6
+ - man
7
+ - dude
8
+ - boy
9
+ - mankind
10
+ - men
11
+ - dudes
@@ -0,0 +1,9 @@
1
+ - it
2
+ - it's
3
+ - its
4
+ - they
5
+ - their
6
+ - someone
7
+ - anyone
8
+ - somebody
9
+ - someone
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: genderstat
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Sean Collins
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-23 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Calculate the relative frequencies of gendered language in a file or
14
+ on the web
15
+ email: sean@cllns.com
16
+ executables:
17
+ - genderstat
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".gitignore"
22
+ - Gemfile
23
+ - README.md
24
+ - bin/genderstat
25
+ - feminine_words.yaml
26
+ - genderstat.gemspec
27
+ - lib/genderstat.rb
28
+ - lib/genderstat/file_reader.rb
29
+ - lib/genderstat/text_reader.rb
30
+ - lib/genderstat/version.rb
31
+ - lib/genderstat/web_reader.rb
32
+ - lib/genderstat/word_counter.rb
33
+ - lib/genderstat/word_counters.rb
34
+ - license.txt
35
+ - masculine_words.yaml
36
+ - neutral_words.yaml
37
+ homepage: http://github.org/cllns/genderstat
38
+ licenses:
39
+ - MIT
40
+ metadata: {}
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '2.0'
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubyforge_project:
57
+ rubygems_version: 2.2.0
58
+ signing_key:
59
+ specification_version: 4
60
+ summary: A gendered language frequency calculator
61
+ test_files: []