twigg 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,137 @@
1
+ require 'forwardable'
2
+ require 'set'
3
+
4
+ module Twigg
5
+ class CommitSet
6
+ extend Forwardable
7
+ def_delegators :commits, :any?, :count, :each, :inject, :<<
8
+ attr_reader :commits
9
+
10
+ def initialize(commits = [])
11
+ @commits = commits
12
+ end
13
+
14
+ def additions
15
+ @additions ||= inject(0) do |memo, commit|
16
+ memo + commit.stat[:additions]
17
+ end
18
+ end
19
+
20
+ def deletions
21
+ @deletions ||= inject(0) do |memo, commit|
22
+ memo + commit.stat[:deletions]
23
+ end
24
+ end
25
+
26
+ def flesch_reading_ease
27
+ @flesch_reading_ease ||= inject(0) do |memo, commit|
28
+ memo + commit.flesch_reading_ease
29
+ end / count
30
+ end
31
+
32
+ def russianness
33
+ @russianness ||= inject(0) do |memo, commit|
34
+ memo + commit.russianness
35
+ end
36
+ end
37
+
38
+ def count_by_day(days)
39
+ start_date = Date.today - days
40
+ end_date = Date.today
41
+ date_to_commits = @commits.group_by { |commit| commit.date }
42
+ (start_date..end_date).map do |date|
43
+ { date: date, count: date_to_commits.fetch(date, []).count }
44
+ end
45
+ end
46
+
47
+ # Returns a copy of the receiver merged with `commit_set`.
48
+ def +(commit_set)
49
+ unless commit_set.is_a?(CommitSet)
50
+ raise TypeError, "expected Twigg::CommitSet, got #{commit_set.class}"
51
+ end
52
+
53
+ dup.tap do |other|
54
+ other.commits.concat(commit_set.commits)
55
+ other.commits.uniq!
56
+ end
57
+ end
58
+
59
+ def count_by_repo
60
+ counts = Hash.new(0)
61
+ each { |commit| counts[commit.repo] += 1 }
62
+ counts.sort_by { |repo, count| -count }.
63
+ map { |repo, count| { repo: repo, count: count } }
64
+ end
65
+
66
+ def select_author(author)
67
+ commits_for_author = @commits.select do |commit|
68
+ commit.author_names.include?(author)
69
+ end
70
+
71
+ self.class.new(commits_for_author)
72
+ end
73
+
74
+ def select_team(team)
75
+ members = Set.new(Config.teams[team])
76
+
77
+ commits_for_team = @commits.select do |commit|
78
+ commit.author_names.any? { |author| members.include?(author) }
79
+ end
80
+
81
+ self.class.new(commits_for_team)
82
+ end
83
+
84
+ def authors
85
+ @authors ||= author_to_commit_set.
86
+ sort_by { |author, commit_set| -commit_set.count }.
87
+ map { |author, commit_set| { author: author, commit_set: commit_set } }
88
+ end
89
+
90
+ # Returns a sparse pairing "matrix".
91
+ #
92
+ # Keys are pairer names. Values are hashes of pairees-to-count maps.
93
+ def pairs
94
+ PairMatrix.new(self)
95
+ end
96
+
97
+ def teams
98
+ set = author_to_commit_set
99
+
100
+ teams = Config.teams.each_pair.map do |team, members|
101
+ commits = members.each_with_object(self.class.new) do |member, commit_set|
102
+ if member = set.delete(member)
103
+ commit_set += member
104
+ end
105
+ end
106
+
107
+ if commits.any?
108
+ {
109
+ author: team.to_s,
110
+ commit_set: commits,
111
+ authors: members,
112
+ }
113
+ end
114
+ end.compact.sort_by { |team| -team[:commit_set].count }
115
+
116
+ unless set.empty?
117
+ teams << {
118
+ author: Team::OTHER_TEAM_NAME,
119
+ commit_set: set.values.inject(self.class.new, :+),
120
+ authors: set.keys,
121
+ }
122
+ end
123
+
124
+ teams
125
+ end
126
+
127
+ private
128
+
129
+ def author_to_commit_set
130
+ Hash.new { |h, k| h[k] = self.class.new }.tap do |set|
131
+ each do |commit|
132
+ commit.author_names.each { |author_name| set[author_name] << commit }
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,95 @@
1
+ require 'forwardable'
2
+ require 'shellwords'
3
+ require 'yaml'
4
+
5
+ module Twigg
6
+ # The Config class mediates all access to the Twigg config file.
7
+ #
8
+ # First, we look for a YAML file at the location specified by the TWIGGRC
9
+ # environment variable. If that isn't set, we fallback to looking for a config
10
+ # file at `~/.twiggrc`.
11
+ #
12
+ # Example use:
13
+ #
14
+ # Config.bind # the bind address for the Twigg web app
15
+ # # [default: 0.0.0.0]
16
+ # Config.gerrit.host # the (optional) Gerrit hostname
17
+ # # [default: localhost]
18
+ # Config.gerrit.port # the (optional) Gerrit port
19
+ # # [default: 29418]
20
+ # Config.gerrit.user # the (optional) Gerrit username
21
+ # # [default: $USER environment variable]
22
+ # Config.repositories_directory # where to find repositories
23
+ #
24
+ class Config
25
+ include Console
26
+
27
+ class << self
28
+ # For convenience, forward all messages to the underlying {Config}
29
+ # instance. This allows us to write things like `Config.bind` instead of
30
+ # the more verbose `Config.config.bind`.
31
+ extend Forwardable
32
+ def_delegators :config, :method_missing
33
+
34
+ private
35
+
36
+ # Maintain a "singleton" Config instance for convenient access.
37
+ def config
38
+ @config ||= new
39
+ end
40
+ end
41
+
42
+ def initialize
43
+ @settings = Settings.new(config_from_argv ||
44
+ config_from_env ||
45
+ config_from_home)
46
+ end
47
+
48
+ private
49
+
50
+ # Foward all messages to the underlying {Settings} instance.
51
+ def method_missing(method, *args, &block)
52
+ @settings.send(method, *args, &block)
53
+ end
54
+
55
+ def config_from_file(path)
56
+ YAML.load_file(path).tap do |contents|
57
+ if File.world_readable?(path)
58
+ warn "#{path} is world-readable"
59
+ stderr strip_heredoc(<<-DOC)
60
+
61
+ The Twigg config file may contain sensitive information, such as
62
+ access credentials for external services.
63
+
64
+ Suggested action: tighten the filesystem permissions with:
65
+
66
+ chmod 600 #{Shellwords.escape path}
67
+
68
+ DOC
69
+ end
70
+ end
71
+ end
72
+
73
+ def config_from_argv
74
+ # It is a bit of a smell to have the Config class know about argument
75
+ # processing, but, at least in development, Bundler will end up eagerly
76
+ # loading the config when it evaluates the Gemfile (and hence the
77
+ # twigg-app.gemspec), which means that this happens before the
78
+ # Twigg::Command.run method gets a chance to set things up properly.
79
+ path = consume_option(%w[-c --config], ARGV)
80
+ config_from_file(path) if path
81
+ end
82
+
83
+ def config_from_env
84
+ config_from_file(ENV['TWIGGRC']) if ENV['TWIGGRC']
85
+ end
86
+
87
+ TWIGGRC = '.twiggrc'
88
+
89
+ def config_from_home
90
+ config_from_file(File.join(Dir.home, TWIGGRC))
91
+ rescue Errno::ENOENT
92
+ {} # no custom config; assume defaults
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,68 @@
1
+ module Twigg
2
+ # A collection of useful methods for code that is running in a console.
3
+ #
4
+ # Functionality includes printing, process lifecycle management and
5
+ # formatting.
6
+ module Console
7
+ extend self
8
+
9
+ private
10
+ # Print `msgs` to standard error
11
+ def stderr(*msgs)
12
+ STDERR.puts(*msgs)
13
+ end
14
+
15
+ # Exit with an exit code of 1, printing the optional `msg`, prefixed with
16
+ # "error: ", to standard error if present
17
+ def die(msg = nil)
18
+ error(msg) if msg
19
+ exit 1
20
+ end
21
+
22
+ # Print `msg` to the standard error, prefixed with "error: "
23
+ def error(msg)
24
+ stderr("error: #{msg}")
25
+ end
26
+
27
+ # Print `msg` to the standard error, prefixed with "warning: "
28
+ def warn(msg)
29
+ stderr "warning: #{msg}"
30
+ end
31
+
32
+ # Given a "heredoc" `doc`, find the non-empty line with the smallest indent,
33
+ # and strip that amount of whitespace from the beginning of each line.
34
+ #
35
+ # This allows us to write nicely indented copy that sits well with the
36
+ # surrounding code, irrespective of the level of indentation of the code,
37
+ # without emitting excessive whitespace to the user at runtime.
38
+ def strip_heredoc(doc)
39
+ indent = doc.scan(/^[ \t]*(?=\S)/).map(&:size).min || 0
40
+ doc.gsub(/^[ \t]{#{indent}}/, '')
41
+ end
42
+
43
+ # Given `switches` (which may be either a single switch or an array of
44
+ # switches) and an array of arguments, `args`, scans through the arguments
45
+ # looking for the switches and the corresponding values.
46
+ #
47
+ # This can be used, for example, to extract the value "/etc/twiggrc" from an
48
+ # argument list like "--verbose --debug --config /etc/twiggrc help".
49
+ #
50
+ # In the event that the switches appear multiple times in the list, the
51
+ # right-most wins. If a switch is found without a corresponding option an
52
+ # exception is raised.
53
+ #
54
+ # Consumes matching options (ie. deletes them from `args) and returns the
55
+ # corresponding (rightmost) value, or `nil` in the event there is no match.
56
+ def consume_option(switches, args)
57
+ # consume from left to right; rightmost will win
58
+ while index = args.find_index { |arg| Array(switches).include?(arg) }
59
+ switch, value = args.slice!(index, 2)
60
+ raise ArgumentError, "missing option (expected after #{switch})" unless value
61
+ end
62
+
63
+ value
64
+ end
65
+ end
66
+
67
+ Console.public_class_method :die
68
+ end
@@ -0,0 +1,12 @@
1
+ module Twigg
2
+ module Dependency
3
+ private
4
+
5
+ def with_dependency(gem, &block)
6
+ require gem
7
+ yield
8
+ rescue LoadError => e
9
+ Console.die "#{e}: try `gem install #{gem}`"
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,65 @@
1
+ module Twigg
2
+ # Class which computes an approximation of the Flesch Reading Ease metric for
3
+ # a given piece of English-language text.
4
+ #
5
+ # @see {http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests}
6
+ class Flesch
7
+ def initialize(string)
8
+ @string = string
9
+ end
10
+
11
+ def reading_ease
12
+ # from wikipedia:
13
+ ease = 206.835 -
14
+ 1.015 * (total_words / total_sentences.to_f) -
15
+ 84.6 * (total_syllables / total_words.to_f)
16
+
17
+ # beware NaN values (usually caused by empty commit messages),
18
+ # incompatible with JSON
19
+ ease.nan? ? 206.835 : ease
20
+ end
21
+
22
+ private
23
+
24
+ # Returns approximate count of words in the receiver.
25
+ def total_words
26
+ words.size
27
+ end
28
+
29
+ # Returns an array of "words" in the receiver. "Words" are defined as
30
+ # strings of consecutive "word" characters (as defined by the regex
31
+ # short-hand, `\w`).
32
+ def words
33
+ @words ||= @string.split(/\b/).select { |w| w.match /\w/ }
34
+ end
35
+
36
+ # Returns approximate total count of sentences in the receiver.
37
+ def total_sentences
38
+ @string.split(/\.+/).size
39
+ end
40
+
41
+ # Returns approximate total count of syllables in the receiever.
42
+ def total_syllables
43
+ words.inject(0) { |memo, word| memo + syllables(word) }
44
+ end
45
+
46
+ # Returns an approximate syllable count for `word`.
47
+ #
48
+ # Based on: {http://stackoverflow.com/questions/1271918/ruby-count-syllables}
49
+ def syllables(word)
50
+ # words of 3 letters or less count as 1 syllable; rare exceptions (eg.
51
+ # "ion") are not handled
52
+ return 1 if word.size <= 3
53
+
54
+ # - ignore final es, ed, e (except for le)
55
+ # - consecutive vowels count as one syllable
56
+ word.
57
+ downcase.
58
+ gsub(/W+/, ' '). # suppress punctuation
59
+ sub(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, '').
60
+ sub(/^y/, '').
61
+ scan(/[aeiouy]{1,2}/).
62
+ size
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,15 @@
1
+ module Twigg
2
+ module Gatherer
3
+ def self.gather(repositories_directory, days)
4
+ since = Time.now - days * 24 * 60 * 60
5
+
6
+ CommitSet.new.tap do |commit_set|
7
+ RepoSet.new(repositories_directory).for_each_repo do |repo|
8
+ repo.commits(since: since).each do |commit|
9
+ commit_set << commit
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,83 @@
1
+ require 'forwardable'
2
+
3
+ module Twigg
4
+ # A PairMatrix is initialized with a {CommitSet} instance and computes
5
+ # pairing information for those commits.
6
+ class PairMatrix
7
+ extend Forwardable
8
+ def_delegators :pairs, :[], :keys
9
+
10
+ def initialize(commit_set)
11
+ @commit_set = commit_set
12
+ end
13
+
14
+ # Returns a sparse matrix representing the pairing permutations, and commit
15
+ # counts for each, in the receiver.
16
+ #
17
+ # The returned matrix is a Hash data structure and can be queried like so:
18
+ #
19
+ # pm['Joe Lencioni']['Noah Silas'] #=> 3 (commit count by the pair)
20
+ # pm['Tony Wooster']['Tony Wooster'] #=> 9 (commit count as solo author)
21
+ # pm['Joe Lencioni']['Tony Wooster'] #=> 0 (no commits, no pairing)
22
+ #
23
+ # Note that the {#[]} method is forwarded to the underlying Hash, which
24
+ # means that the above examples work equally well whether `pm` is an
25
+ # instance of a {PairMatrix} or the result of a call to the the {#pairs}
26
+ # method on a {PairMatrix} instance.
27
+ def pairs
28
+ @pairs ||= sparse_matrix.tap do |matrix|
29
+ @commit_set.each do |commit|
30
+ authors = commit.author_names
31
+
32
+ # if you're solo, that's equivalent to pairing with yourself
33
+ authors *= 2 if authors.size == 1
34
+
35
+ authors.permutation(2).to_a.uniq.each do |pairer, pairee|
36
+ matrix[pairer][pairee] += 1
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ # Returns a sorted array of names corresponding to the authors represented
43
+ # in the matrix.
44
+ def authors
45
+ @authors ||= pairs.keys.sort
46
+ end
47
+
48
+ # Scan the matrix, identifying and returning the "solo" element (ie. one
49
+ # person working alone) with the highest number of commits.
50
+ def max_solo
51
+ @max_solo ||= pairs.inject(0) do |max, (pairee, pairs)|
52
+ [pairs.inject(0) do |max, (pairer, count)|
53
+ [pairee == pairer ? count : 0, max].max
54
+ end, max].max
55
+ end
56
+ end
57
+
58
+ # Scan the matrix, identifying and returning the "pair" element (ie. two
59
+ # distinct people pairing) with the highest number of commits.
60
+ def max_pair
61
+ @max_pair ||= pairs.inject(0) do |max, (pairee, pairs)|
62
+ [pairs.inject(0) do |max, (pairer, count)|
63
+ [pairee == pairer ? 0 : count, max].max
64
+ end, max].max
65
+ end
66
+ end
67
+
68
+ private
69
+
70
+ # Returns a Hash instance that models a sparse matrix.
71
+ #
72
+ # Looking up a pairee/pairer pair in the matrix returns 0 if the matrix does
73
+ # not have a value for that entry; for example:
74
+ #
75
+ # sparse_matrix['Jimmy Kittiyachavalit']['Chris Chan'] #=> 0
76
+ #
77
+ def sparse_matrix
78
+ Hash.new do |hash, pairer|
79
+ hash[pairer] = Hash.new { |hash, pairee| hash[pairee] = 0 }
80
+ end
81
+ end
82
+ end
83
+ end
data/lib/twigg/repo.rb ADDED
@@ -0,0 +1,134 @@
1
+ require 'date'
2
+ require 'pathname'
3
+
4
+ module Twigg
5
+ # Abstraction around a Git repository on disk.
6
+ class Repo
7
+ class InvalidRepoError < RuntimeError; end
8
+
9
+ # Given `path` to a Git repository on disk sets up a `Repo` instance.
10
+ #
11
+ # Raises an {InvalidRepoError} if `path` does not point to the top level of
12
+ # an existent Git repo.
13
+ def initialize(path)
14
+ @path = Pathname.new(path)
15
+ raise InvalidRepoError unless valid?
16
+ end
17
+
18
+ # Returns an array of {Commit} objects reachable from the HEAD of the repo.
19
+ #
20
+ # There are a number of keyword arguments that correspond to the options of
21
+ # the same name to `git log`:
22
+ #
23
+ # - `all:` : return reachable commits from all branches, not just HEAD
24
+ # - `since:`: only return commits made since this Time
25
+ #
26
+ def commits(all: true, since: nil)
27
+ args = []
28
+ args << '--all' if all
29
+ args << "--since=#{since.to_i}" if since
30
+ @commits ||= {}
31
+ @commits[args] ||= parse_log(log(*args))
32
+ end
33
+
34
+ # Returns the name of the repo.
35
+ #
36
+ # The name is inferred from the final component of the repo path.
37
+ def name
38
+ @path.basename.to_s
39
+ end
40
+
41
+ def link
42
+ if Config.github.organization
43
+ "https://github.com/#{Config.github.organization}/#{name}"
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ STDERR_TO_STDOUT = [err: [:child, :out]]
50
+
51
+ def git_dir
52
+ @git_dir ||= begin
53
+ # first try repo "foo" (bare repo), then "foo/.git" (non-bare repo)
54
+ [@path, @path + '.git'].map(&:to_s).find do |path|
55
+ Process.wait(
56
+ IO.popen({ 'GIT_DIR' => path },
57
+ %w[git rev-parse --git-dir] + STDERR_TO_STDOUT).pid
58
+ )
59
+ $?.success?
60
+ end
61
+ end
62
+ end
63
+
64
+ # Check to see if this is a valid repo:
65
+ #
66
+ # - the repo path should exist
67
+ # - the path should point to the top level of the repo
68
+ # - the check should work for both bare and non-bare repos
69
+ #
70
+ # Delegates to `#git_dir`
71
+ alias :valid? :git_dir
72
+
73
+ # Runs the Git command, `command`, with args `args`.
74
+ def git(command, *args)
75
+ IO.popen([{ 'GIT_DIR' => git_dir },
76
+ 'git', command, *args, *STDERR_TO_STDOUT], 'r') do |io|
77
+ io.read
78
+ end
79
+ end
80
+
81
+ def log(*args)
82
+ format = [
83
+ '%H', # commit hash
84
+ '%n', # newline
85
+ '%aN', # author name (respecting .mailmap)
86
+ '%n', # newline
87
+ '%ct', # committer date, UNIX timestamp
88
+ '%n', # newline
89
+ '%s', # subject
90
+ '%n', # newline
91
+ '%w(0,4,4)%b', # body, indented 4 spaces
92
+ ].join
93
+
94
+ git 'log', "--pretty=format:#{format}", '--numstat', *args
95
+ end
96
+
97
+ def parse_log(string)
98
+ [].tap do |commits|
99
+ lines = string.each_line
100
+ loop do
101
+ begin
102
+ commit = { repo: self }
103
+ commit[:commit] = lines.next.chomp
104
+ commit[:author] = lines.next.chomp
105
+ commit[:date] = Time.at(lines.next.chomp.to_i).to_date
106
+ commit[:subject] = lines.next.chomp rescue ''
107
+
108
+ commit[:body] = []
109
+ while lines.peek =~ /^ {4}(.*)$/ && lines.next
110
+ commit[:body] << $~[1]
111
+ end
112
+ commit[:body] = commit[:body].join("\n")
113
+ lines.next if lines.peek == "\n" # blank separator line
114
+
115
+ commit[:stat] = Hash.new(0)
116
+ while lines.peek =~ /^(\d+|-)\t(\d+|-)\t.+$/ && lines.next
117
+ commit[:stat][:additions] += $~[1].to_i
118
+ commit[:stat][:deletions] += $~[2].to_i
119
+ end
120
+ lines.next if lines.peek == "\n" # blank separator line
121
+
122
+ rescue StopIteration
123
+ break # end of output
124
+ ensure
125
+ # if the underlying repo is bad (eg. no commits yet) this could
126
+ # raise an ArgumentError, so we rescue
127
+ commit = Commit.new(commit) rescue nil
128
+ commits << commit if commit
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,31 @@
1
+ require 'pathname'
2
+
3
+ module Twigg
4
+ # Represents a set of Git repositories existing in a directory.
5
+ class RepoSet
6
+ def initialize(repositories_directory)
7
+ @repositories_directory = Pathname.new(repositories_directory)
8
+ end
9
+
10
+ # Execute `block` for each repo in the set.
11
+ #
12
+ # The {Repo} object is passed in to the block.
13
+ def for_each_repo(&block)
14
+ repos.each do |repo|
15
+ block.call(repo)
16
+ end
17
+ end
18
+
19
+ def repos
20
+ @repos ||= begin
21
+ Dir[File.join(@repositories_directory, '*')].map do |path|
22
+ begin
23
+ repo = Repo.new(path)
24
+ rescue Repo::InvalidRepoError
25
+ # most likely an empty or non-Git directory
26
+ end
27
+ end.compact
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,40 @@
1
+ module Twigg
2
+ class RussianNovel
3
+ # The class takes a {CommitSet} and produces data that can be used to
4
+ # produce a d3 bubble chart:
5
+ #
6
+ # https://github.com/mbostock/d3/wiki/Pack-Layout
7
+ # http://bl.ocks.org/mbostock/4063269
8
+ #
9
+ # The bubble chart is an excellent format for representing the
10
+ # "Russianness" of an author's commit messages:
11
+ #
12
+ # - size: commit message line count (also known as "Russianness")
13
+ # - text: author name
14
+ # - hover: detailed stats on "Russianness", Flesch Reading Ease score,
15
+ # author and team name
16
+ # - color: team
17
+ #
18
+ def initialize(commit_set)
19
+ @commit_set = commit_set
20
+ end
21
+
22
+ # Returns Russian Novel data in a d3-friendly format.
23
+ def data
24
+ @data ||= begin
25
+ team_map = Team.author_to_team_map
26
+
27
+ children = @commit_set.authors.map do |object|
28
+ {
29
+ 'author' => object[:author],
30
+ 'russianness' => object[:commit_set].russianness,
31
+ 'flesch_reading_ease' => object[:commit_set].flesch_reading_ease,
32
+ 'team' => team_map[object[:author]],
33
+ }
34
+ end
35
+
36
+ { 'children' => children }
37
+ end
38
+ end
39
+ end
40
+ end