twigg 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,137 @@
1
+ require 'forwardable'
2
+ require 'set'
3
+
4
+ module Twigg
5
+ class CommitSet
6
+ extend Forwardable
7
+ def_delegators :commits, :any?, :count, :each, :inject, :<<
8
+ attr_reader :commits
9
+
10
+ def initialize(commits = [])
11
+ @commits = commits
12
+ end
13
+
14
+ def additions
15
+ @additions ||= inject(0) do |memo, commit|
16
+ memo + commit.stat[:additions]
17
+ end
18
+ end
19
+
20
+ def deletions
21
+ @deletions ||= inject(0) do |memo, commit|
22
+ memo + commit.stat[:deletions]
23
+ end
24
+ end
25
+
26
+ def flesch_reading_ease
27
+ @flesch_reading_ease ||= inject(0) do |memo, commit|
28
+ memo + commit.flesch_reading_ease
29
+ end / count
30
+ end
31
+
32
+ def russianness
33
+ @russianness ||= inject(0) do |memo, commit|
34
+ memo + commit.russianness
35
+ end
36
+ end
37
+
38
+ def count_by_day(days)
39
+ start_date = Date.today - days
40
+ end_date = Date.today
41
+ date_to_commits = @commits.group_by { |commit| commit.date }
42
+ (start_date..end_date).map do |date|
43
+ { date: date, count: date_to_commits.fetch(date, []).count }
44
+ end
45
+ end
46
+
47
+ # Returns a copy of the receiver merged with `commit_set`.
48
+ def +(commit_set)
49
+ unless commit_set.is_a?(CommitSet)
50
+ raise TypeError, "expected Twigg::CommitSet, got #{commit_set.class}"
51
+ end
52
+
53
+ dup.tap do |other|
54
+ other.commits.concat(commit_set.commits)
55
+ other.commits.uniq!
56
+ end
57
+ end
58
+
59
+ def count_by_repo
60
+ counts = Hash.new(0)
61
+ each { |commit| counts[commit.repo] += 1 }
62
+ counts.sort_by { |repo, count| -count }.
63
+ map { |repo, count| { repo: repo, count: count } }
64
+ end
65
+
66
+ def select_author(author)
67
+ commits_for_author = @commits.select do |commit|
68
+ commit.author_names.include?(author)
69
+ end
70
+
71
+ self.class.new(commits_for_author)
72
+ end
73
+
74
+ def select_team(team)
75
+ members = Set.new(Config.teams[team])
76
+
77
+ commits_for_team = @commits.select do |commit|
78
+ commit.author_names.any? { |author| members.include?(author) }
79
+ end
80
+
81
+ self.class.new(commits_for_team)
82
+ end
83
+
84
+ def authors
85
+ @authors ||= author_to_commit_set.
86
+ sort_by { |author, commit_set| -commit_set.count }.
87
+ map { |author, commit_set| { author: author, commit_set: commit_set } }
88
+ end
89
+
90
+ # Returns a sparse pairing "matrix".
91
+ #
92
+ # Keys are pairer names. Values are hashes of pairees-to-count maps.
93
+ def pairs
94
+ PairMatrix.new(self)
95
+ end
96
+
97
+ def teams
98
+ set = author_to_commit_set
99
+
100
+ teams = Config.teams.each_pair.map do |team, members|
101
+ commits = members.each_with_object(self.class.new) do |member, commit_set|
102
+ if member = set.delete(member)
103
+ commit_set += member
104
+ end
105
+ end
106
+
107
+ if commits.any?
108
+ {
109
+ author: team.to_s,
110
+ commit_set: commits,
111
+ authors: members,
112
+ }
113
+ end
114
+ end.compact.sort_by { |team| -team[:commit_set].count }
115
+
116
+ unless set.empty?
117
+ teams << {
118
+ author: Team::OTHER_TEAM_NAME,
119
+ commit_set: set.values.inject(self.class.new, :+),
120
+ authors: set.keys,
121
+ }
122
+ end
123
+
124
+ teams
125
+ end
126
+
127
+ private
128
+
129
+ def author_to_commit_set
130
+ Hash.new { |h, k| h[k] = self.class.new }.tap do |set|
131
+ each do |commit|
132
+ commit.author_names.each { |author_name| set[author_name] << commit }
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,95 @@
1
+ require 'forwardable'
2
+ require 'shellwords'
3
+ require 'yaml'
4
+
5
+ module Twigg
6
+ # The Config class mediates all access to the Twigg config file.
7
+ #
8
+ # First, we look for a YAML file at the location specified by the TWIGGRC
9
+ # environment variable. If that isn't set, we fallback to looking for a config
10
+ # file at `~/.twiggrc`.
11
+ #
12
+ # Example use:
13
+ #
14
+ # Config.bind # the bind address for the Twigg web app
15
+ # # [default: 0.0.0.0]
16
+ # Config.gerrit.host # the (optional) Gerrit hostname
17
+ # # [default: localhost]
18
+ # Config.gerrit.port # the (optional) Gerrit port
19
+ # # [default: 29418]
20
+ # Config.gerrit.user # the (optional) Gerrit username
21
+ # # [default: $USER environment variable]
22
+ # Config.repositories_directory # where to find repositories
23
+ #
24
+ class Config
25
+ include Console
26
+
27
+ class << self
28
+ # For convenience, forward all messages to the underlying {Config}
29
+ # instance. This allows us to write things like `Config.bind` instead of
30
+ # the more verbose `Config.config.bind`.
31
+ extend Forwardable
32
+ def_delegators :config, :method_missing
33
+
34
+ private
35
+
36
+ # Maintain a "singleton" Config instance for convenient access.
37
+ def config
38
+ @config ||= new
39
+ end
40
+ end
41
+
42
+ def initialize
43
+ @settings = Settings.new(config_from_argv ||
44
+ config_from_env ||
45
+ config_from_home)
46
+ end
47
+
48
+ private
49
+
50
+ # Foward all messages to the underlying {Settings} instance.
51
+ def method_missing(method, *args, &block)
52
+ @settings.send(method, *args, &block)
53
+ end
54
+
55
+ def config_from_file(path)
56
+ YAML.load_file(path).tap do |contents|
57
+ if File.world_readable?(path)
58
+ warn "#{path} is world-readable"
59
+ stderr strip_heredoc(<<-DOC)
60
+
61
+ The Twigg config file may contain sensitive information, such as
62
+ access credentials for external services.
63
+
64
+ Suggested action: tighten the filesystem permissions with:
65
+
66
+ chmod 600 #{Shellwords.escape path}
67
+
68
+ DOC
69
+ end
70
+ end
71
+ end
72
+
73
+ def config_from_argv
74
+ # It is a bit of a smell to have the Config class know about argument
75
+ # processing, but, at least in development, Bundler will end up eagerly
76
+ # loading the config when it evaluates the Gemfile (and hence the
77
+ # twigg-app.gemspec), which means that this happens before the
78
+ # Twigg::Command.run method gets a chance to set things up properly.
79
+ path = consume_option(%w[-c --config], ARGV)
80
+ config_from_file(path) if path
81
+ end
82
+
83
+ def config_from_env
84
+ config_from_file(ENV['TWIGGRC']) if ENV['TWIGGRC']
85
+ end
86
+
87
+ TWIGGRC = '.twiggrc'
88
+
89
+ def config_from_home
90
+ config_from_file(File.join(Dir.home, TWIGGRC))
91
+ rescue Errno::ENOENT
92
+ {} # no custom config; assume defaults
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,68 @@
1
+ module Twigg
2
+ # A collection of useful methods for code that is running in a console.
3
+ #
4
+ # Functionality includes printing, process lifecycle management and
5
+ # formatting.
6
+ module Console
7
+ extend self
8
+
9
+ private
10
+ # Print `msgs` to standard error
11
+ def stderr(*msgs)
12
+ STDERR.puts(*msgs)
13
+ end
14
+
15
+ # Exit with an exit code of 1, printing the optional `msg`, prefixed with
16
+ # "error: ", to standard error if present
17
+ def die(msg = nil)
18
+ error(msg) if msg
19
+ exit 1
20
+ end
21
+
22
+ # Print `msg` to the standard error, prefixed with "error: "
23
+ def error(msg)
24
+ stderr("error: #{msg}")
25
+ end
26
+
27
+ # Print `msg` to the standard error, prefixed with "warning: "
28
+ def warn(msg)
29
+ stderr "warning: #{msg}"
30
+ end
31
+
32
+ # Given a "heredoc" `doc`, find the non-empty line with the smallest indent,
33
+ # and strip that amount of whitespace from the beginning of each line.
34
+ #
35
+ # This allows us to write nicely indented copy that sits well with the
36
+ # surrounding code, irrespective of the level of indentation of the code,
37
+ # without emitting excessive whitespace to the user at runtime.
38
+ def strip_heredoc(doc)
39
+ indent = doc.scan(/^[ \t]*(?=\S)/).map(&:size).min || 0
40
+ doc.gsub(/^[ \t]{#{indent}}/, '')
41
+ end
42
+
43
+ # Given `switches` (which may be either a single switch or an array of
44
+ # switches) and an array of arguments, `args`, scans through the arguments
45
+ # looking for the switches and the corresponding values.
46
+ #
47
+ # This can be used, for example, to extract the value "/etc/twiggrc" from an
48
+ # argument list like "--verbose --debug --config /etc/twiggrc help".
49
+ #
50
+ # In the event that the switches appear multiple times in the list, the
51
+ # right-most wins. If a switch is found without a corresponding option an
52
+ # exception is raised.
53
+ #
54
+ # Consumes matching options (ie. deletes them from `args) and returns the
55
+ # corresponding (rightmost) value, or `nil` in the event there is no match.
56
+ def consume_option(switches, args)
57
+ # consume from left to right; rightmost will win
58
+ while index = args.find_index { |arg| Array(switches).include?(arg) }
59
+ switch, value = args.slice!(index, 2)
60
+ raise ArgumentError, "missing option (expected after #{switch})" unless value
61
+ end
62
+
63
+ value
64
+ end
65
+ end
66
+
67
+ Console.public_class_method :die
68
+ end
@@ -0,0 +1,12 @@
1
+ module Twigg
2
+ module Dependency
3
+ private
4
+
5
+ def with_dependency(gem, &block)
6
+ require gem
7
+ yield
8
+ rescue LoadError => e
9
+ Console.die "#{e}: try `gem install #{gem}`"
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,65 @@
1
+ module Twigg
2
+ # Class which computes an approximation of the Flesch Reading Ease metric for
3
+ # a given piece of English-language text.
4
+ #
5
+ # @see {http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests}
6
+ class Flesch
7
+ def initialize(string)
8
+ @string = string
9
+ end
10
+
11
+ def reading_ease
12
+ # from wikipedia:
13
+ ease = 206.835 -
14
+ 1.015 * (total_words / total_sentences.to_f) -
15
+ 84.6 * (total_syllables / total_words.to_f)
16
+
17
+ # beware NaN values (usually caused by empty commit messages),
18
+ # incompatible with JSON
19
+ ease.nan? ? 206.835 : ease
20
+ end
21
+
22
+ private
23
+
24
+ # Returns approximate count of words in the receiver.
25
+ def total_words
26
+ words.size
27
+ end
28
+
29
+ # Returns an array of "words" in the receiver. "Words" are defined as
30
+ # strings of consecutive "word" characters (as defined by the regex
31
+ # short-hand, `\w`).
32
+ def words
33
+ @words ||= @string.split(/\b/).select { |w| w.match /\w/ }
34
+ end
35
+
36
+ # Returns approximate total count of sentences in the receiver.
37
+ def total_sentences
38
+ @string.split(/\.+/).size
39
+ end
40
+
41
+ # Returns approximate total count of syllables in the receiever.
42
+ def total_syllables
43
+ words.inject(0) { |memo, word| memo + syllables(word) }
44
+ end
45
+
46
+ # Returns an approximate syllable count for `word`.
47
+ #
48
+ # Based on: {http://stackoverflow.com/questions/1271918/ruby-count-syllables}
49
+ def syllables(word)
50
+ # words of 3 letters or less count as 1 syllable; rare exceptions (eg.
51
+ # "ion") are not handled
52
+ return 1 if word.size <= 3
53
+
54
+ # - ignore final es, ed, e (except for le)
55
+ # - consecutive vowels count as one syllable
56
+ word.
57
+ downcase.
58
+ gsub(/W+/, ' '). # suppress punctuation
59
+ sub(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, '').
60
+ sub(/^y/, '').
61
+ scan(/[aeiouy]{1,2}/).
62
+ size
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,15 @@
1
+ module Twigg
2
+ module Gatherer
3
+ def self.gather(repositories_directory, days)
4
+ since = Time.now - days * 24 * 60 * 60
5
+
6
+ CommitSet.new.tap do |commit_set|
7
+ RepoSet.new(repositories_directory).for_each_repo do |repo|
8
+ repo.commits(since: since).each do |commit|
9
+ commit_set << commit
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,83 @@
1
+ require 'forwardable'
2
+
3
+ module Twigg
4
+ # A PairMatrix is initialized with a {CommitSet} instance and computes
5
+ # pairing information for those commits.
6
+ class PairMatrix
7
+ extend Forwardable
8
+ def_delegators :pairs, :[], :keys
9
+
10
+ def initialize(commit_set)
11
+ @commit_set = commit_set
12
+ end
13
+
14
+ # Returns a sparse matrix representing the pairing permutations, and commit
15
+ # counts for each, in the receiver.
16
+ #
17
+ # The returned matrix is a Hash data structure and can be queried like so:
18
+ #
19
+ # pm['Joe Lencioni']['Noah Silas'] #=> 3 (commit count by the pair)
20
+ # pm['Tony Wooster']['Tony Wooster'] #=> 9 (commit count as solo author)
21
+ # pm['Joe Lencioni']['Tony Wooster'] #=> 0 (no commits, no pairing)
22
+ #
23
+ # Note that the {#[]} method is forwarded to the underlying Hash, which
24
+ # means that the above examples work equally well whether `pm` is an
25
+ # instance of a {PairMatrix} or the result of a call to the the {#pairs}
26
+ # method on a {PairMatrix} instance.
27
+ def pairs
28
+ @pairs ||= sparse_matrix.tap do |matrix|
29
+ @commit_set.each do |commit|
30
+ authors = commit.author_names
31
+
32
+ # if you're solo, that's equivalent to pairing with yourself
33
+ authors *= 2 if authors.size == 1
34
+
35
+ authors.permutation(2).to_a.uniq.each do |pairer, pairee|
36
+ matrix[pairer][pairee] += 1
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ # Returns a sorted array of names corresponding to the authors represented
43
+ # in the matrix.
44
+ def authors
45
+ @authors ||= pairs.keys.sort
46
+ end
47
+
48
+ # Scan the matrix, identifying and returning the "solo" element (ie. one
49
+ # person working alone) with the highest number of commits.
50
+ def max_solo
51
+ @max_solo ||= pairs.inject(0) do |max, (pairee, pairs)|
52
+ [pairs.inject(0) do |max, (pairer, count)|
53
+ [pairee == pairer ? count : 0, max].max
54
+ end, max].max
55
+ end
56
+ end
57
+
58
+ # Scan the matrix, identifying and returning the "pair" element (ie. two
59
+ # distinct people pairing) with the highest number of commits.
60
+ def max_pair
61
+ @max_pair ||= pairs.inject(0) do |max, (pairee, pairs)|
62
+ [pairs.inject(0) do |max, (pairer, count)|
63
+ [pairee == pairer ? 0 : count, max].max
64
+ end, max].max
65
+ end
66
+ end
67
+
68
+ private
69
+
70
+ # Returns a Hash instance that models a sparse matrix.
71
+ #
72
+ # Looking up a pairee/pairer pair in the matrix returns 0 if the matrix does
73
+ # not have a value for that entry; for example:
74
+ #
75
+ # sparse_matrix['Jimmy Kittiyachavalit']['Chris Chan'] #=> 0
76
+ #
77
+ def sparse_matrix
78
+ Hash.new do |hash, pairer|
79
+ hash[pairer] = Hash.new { |hash, pairee| hash[pairee] = 0 }
80
+ end
81
+ end
82
+ end
83
+ end
data/lib/twigg/repo.rb ADDED
@@ -0,0 +1,134 @@
1
+ require 'date'
2
+ require 'pathname'
3
+
4
+ module Twigg
5
+ # Abstraction around a Git repository on disk.
6
+ class Repo
7
+ class InvalidRepoError < RuntimeError; end
8
+
9
+ # Given `path` to a Git repository on disk sets up a `Repo` instance.
10
+ #
11
+ # Raises an {InvalidRepoError} if `path` does not point to the top level of
12
+ # an existent Git repo.
13
+ def initialize(path)
14
+ @path = Pathname.new(path)
15
+ raise InvalidRepoError unless valid?
16
+ end
17
+
18
+ # Returns an array of {Commit} objects reachable from the HEAD of the repo.
19
+ #
20
+ # There are a number of keyword arguments that correspond to the options of
21
+ # the same name to `git log`:
22
+ #
23
+ # - `all:` : return reachable commits from all branches, not just HEAD
24
+ # - `since:`: only return commits made since this Time
25
+ #
26
+ def commits(all: true, since: nil)
27
+ args = []
28
+ args << '--all' if all
29
+ args << "--since=#{since.to_i}" if since
30
+ @commits ||= {}
31
+ @commits[args] ||= parse_log(log(*args))
32
+ end
33
+
34
+ # Returns the name of the repo.
35
+ #
36
+ # The name is inferred from the final component of the repo path.
37
+ def name
38
+ @path.basename.to_s
39
+ end
40
+
41
+ def link
42
+ if Config.github.organization
43
+ "https://github.com/#{Config.github.organization}/#{name}"
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ STDERR_TO_STDOUT = [err: [:child, :out]]
50
+
51
+ def git_dir
52
+ @git_dir ||= begin
53
+ # first try repo "foo" (bare repo), then "foo/.git" (non-bare repo)
54
+ [@path, @path + '.git'].map(&:to_s).find do |path|
55
+ Process.wait(
56
+ IO.popen({ 'GIT_DIR' => path },
57
+ %w[git rev-parse --git-dir] + STDERR_TO_STDOUT).pid
58
+ )
59
+ $?.success?
60
+ end
61
+ end
62
+ end
63
+
64
+ # Check to see if this is a valid repo:
65
+ #
66
+ # - the repo path should exist
67
+ # - the path should point to the top level of the repo
68
+ # - the check should work for both bare and non-bare repos
69
+ #
70
+ # Delegates to `#git_dir`
71
+ alias :valid? :git_dir
72
+
73
+ # Runs the Git command, `command`, with args `args`.
74
+ def git(command, *args)
75
+ IO.popen([{ 'GIT_DIR' => git_dir },
76
+ 'git', command, *args, *STDERR_TO_STDOUT], 'r') do |io|
77
+ io.read
78
+ end
79
+ end
80
+
81
+ def log(*args)
82
+ format = [
83
+ '%H', # commit hash
84
+ '%n', # newline
85
+ '%aN', # author name (respecting .mailmap)
86
+ '%n', # newline
87
+ '%ct', # committer date, UNIX timestamp
88
+ '%n', # newline
89
+ '%s', # subject
90
+ '%n', # newline
91
+ '%w(0,4,4)%b', # body, indented 4 spaces
92
+ ].join
93
+
94
+ git 'log', "--pretty=format:#{format}", '--numstat', *args
95
+ end
96
+
97
+ def parse_log(string)
98
+ [].tap do |commits|
99
+ lines = string.each_line
100
+ loop do
101
+ begin
102
+ commit = { repo: self }
103
+ commit[:commit] = lines.next.chomp
104
+ commit[:author] = lines.next.chomp
105
+ commit[:date] = Time.at(lines.next.chomp.to_i).to_date
106
+ commit[:subject] = lines.next.chomp rescue ''
107
+
108
+ commit[:body] = []
109
+ while lines.peek =~ /^ {4}(.*)$/ && lines.next
110
+ commit[:body] << $~[1]
111
+ end
112
+ commit[:body] = commit[:body].join("\n")
113
+ lines.next if lines.peek == "\n" # blank separator line
114
+
115
+ commit[:stat] = Hash.new(0)
116
+ while lines.peek =~ /^(\d+|-)\t(\d+|-)\t.+$/ && lines.next
117
+ commit[:stat][:additions] += $~[1].to_i
118
+ commit[:stat][:deletions] += $~[2].to_i
119
+ end
120
+ lines.next if lines.peek == "\n" # blank separator line
121
+
122
+ rescue StopIteration
123
+ break # end of output
124
+ ensure
125
+ # if the underlying repo is bad (eg. no commits yet) this could
126
+ # raise an ArgumentError, so we rescue
127
+ commit = Commit.new(commit) rescue nil
128
+ commits << commit if commit
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,31 @@
1
+ require 'pathname'
2
+
3
+ module Twigg
4
+ # Represents a set of Git repositories existing in a directory.
5
+ class RepoSet
6
+ def initialize(repositories_directory)
7
+ @repositories_directory = Pathname.new(repositories_directory)
8
+ end
9
+
10
+ # Execute `block` for each repo in the set.
11
+ #
12
+ # The {Repo} object is passed in to the block.
13
+ def for_each_repo(&block)
14
+ repos.each do |repo|
15
+ block.call(repo)
16
+ end
17
+ end
18
+
19
+ def repos
20
+ @repos ||= begin
21
+ Dir[File.join(@repositories_directory, '*')].map do |path|
22
+ begin
23
+ repo = Repo.new(path)
24
+ rescue Repo::InvalidRepoError
25
+ # most likely an empty or non-Git directory
26
+ end
27
+ end.compact
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,40 @@
1
+ module Twigg
2
+ class RussianNovel
3
+ # The class takes a {CommitSet} and produces data that can be used to
4
+ # produce a d3 bubble chart:
5
+ #
6
+ # https://github.com/mbostock/d3/wiki/Pack-Layout
7
+ # http://bl.ocks.org/mbostock/4063269
8
+ #
9
+ # The bubble chart is an excellent format for representing the
10
+ # "Russianness" of an author's commit messages:
11
+ #
12
+ # - size: commit message line count (also known as "Russianness")
13
+ # - text: author name
14
+ # - hover: detailed stats on "Russianness", Flesch Reading Ease score,
15
+ # author and team name
16
+ # - color: team
17
+ #
18
+ def initialize(commit_set)
19
+ @commit_set = commit_set
20
+ end
21
+
22
+ # Returns Russian Novel data in a d3-friendly format.
23
+ def data
24
+ @data ||= begin
25
+ team_map = Team.author_to_team_map
26
+
27
+ children = @commit_set.authors.map do |object|
28
+ {
29
+ 'author' => object[:author],
30
+ 'russianness' => object[:commit_set].russianness,
31
+ 'flesch_reading_ease' => object[:commit_set].flesch_reading_ease,
32
+ 'team' => team_map[object[:author]],
33
+ }
34
+ end
35
+
36
+ { 'children' => children }
37
+ end
38
+ end
39
+ end
40
+ end