email_graph 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +152 -0
- data/Rakefile +2 -0
- data/email_graph.gemspec +28 -0
- data/lib/email_graph.rb +7 -0
- data/lib/email_graph/directed_graph.rb +122 -0
- data/lib/email_graph/gmail_fetcher.rb +102 -0
- data/lib/email_graph/interaction_graph.rb +114 -0
- data/lib/email_graph/undirected_graph.rb +67 -0
- data/lib/email_graph/version.rb +3 -0
- data/spec/directed_graph_spec.rb +90 -0
- data/spec/interaction_graph_spec.rb +90 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/undirected_graph_spec.rb +53 -0
- metadata +135 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 60c190e3910f79ec5129c9cca0b0227b87390e68
|
4
|
+
data.tar.gz: 2fe939d783d01402f1acecd9fac40aeda554d815
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 53e992b88cff05752d47163afb3d0f0d26c71eb788f3f4c75e67f722a8ff3a325a1b2016e1ac17644571d79a98b8089a2be64cfeae45d862e1e3e7daf27a4232
|
7
|
+
data.tar.gz: 97e5b6f60728506c480982ea02de8a921012daef7a1100632c90f5d0fba9ad1e06a25cf960319b1bfa678dcacf9131889f50d5063ac25f9132edef622418d19c
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Ryan Dick
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
# EmailGraph
|
2
|
+
|
3
|
+
Build and analyze graph data structures from email history.
|
4
|
+
|
5
|
+
Focus is on identities and interactions between them, for example:
|
6
|
+
* Who do I email with the most?
|
7
|
+
* Who are my strong contacts, as identified by some two-way interaction
|
8
|
+
threshold function?
|
9
|
+
* What is the distribution of my email interactions with a given person over
|
10
|
+
time?
|
11
|
+
|
12
|
+
Subject lines and message bodies are currently ignored for simplification.
|
13
|
+
|
14
|
+
## Installation
|
15
|
+
|
16
|
+
Add this line to your application's Gemfile:
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
gem 'email_graph'
|
20
|
+
```
|
21
|
+
|
22
|
+
And then execute:
|
23
|
+
|
24
|
+
$ bundle
|
25
|
+
|
26
|
+
Or install it yourself as:
|
27
|
+
|
28
|
+
$ gem install email_graph
|
29
|
+
|
30
|
+
## Graph types
|
31
|
+
|
32
|
+
There are two types of graphs to be built from email data.
|
33
|
+
|
34
|
+
### 1. Interaction graph
|
35
|
+
|
36
|
+
Class: `EmailGraph::InteractionGraph`
|
37
|
+
|
38
|
+
This is a directed graph where each vertex is an email address and each edge is
|
39
|
+
an instance of `EmailGraph::InteractionRelationship` - a directed interaction
|
40
|
+
history between two emails.
|
41
|
+
|
42
|
+
The graph has these properties:
|
43
|
+
* Implements common graph methods (e.g., `#vertices`, `#edges`, etc)
|
44
|
+
* Efficient fetching of vertices' in-edges (not always a default of graph
|
45
|
+
structures)
|
46
|
+
* Loops allowed
|
47
|
+
|
48
|
+
Given a message, an interaction is created from the sender to every address in
|
49
|
+
the `to`, `cc`, and `bcc` fields - there is no distinction among the latter.
|
50
|
+
|
51
|
+
`EmailGraph::InteractionRelationship` objects have an `interactions` attribute,
|
52
|
+
which holds an array of the `Time` objects of the interactions.
|
53
|
+
|
54
|
+
Example:
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
# Assuming you have an array 'messages' of message-like objects (see below for
|
58
|
+
# how to use the Gmail fetcher)
|
59
|
+
g = EmailGraph::InteractionGraph.new
|
60
|
+
messages.each{ |m| g.add_message(m) }
|
61
|
+
|
62
|
+
# ...or...
|
63
|
+
|
64
|
+
g = EmailGraph::InteractionGraph.new(messages: messages)
|
65
|
+
|
66
|
+
# For example, see a sorted list of your email contacts by emails sent
|
67
|
+
g.edges_from("your@emailhere.com")
|
68
|
+
.sort_by{ |e| -e.interactions.size }
|
69
|
+
.map{ |e| [e.to, e.interactions.size] }
|
70
|
+
```
|
71
|
+
|
72
|
+
### 2. Mutual relationship graph
|
73
|
+
|
74
|
+
Class: `EmailGraph::UndirectedGraph` (just uses the abstract class)
|
75
|
+
|
76
|
+
This is an undirected graph where each vertex is also an email, however, this
|
77
|
+
time, the edges are instances of `EmailGraph::MutualRelationship` - an
|
78
|
+
undirected edge that similarly includes an interaction history (though an
|
79
|
+
undirected one).
|
80
|
+
|
81
|
+
This graph is created from an `EmailGraph::InteractionGraph` by creating
|
82
|
+
undirected edges from pairs of directed edge inverses. Optionally, a filter can
|
83
|
+
be applied during this process to determine whether an undirected edge is
|
84
|
+
created for a given pair of directed edges.
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
g = EmailGraph::InteractionGraph.new(messages: messages)
|
88
|
+
|
89
|
+
# This creates the graph using the default filter, which is that an edge has to
|
90
|
+
# have an inverse in order to create a new undirected edge.
|
91
|
+
mg = g.to_mutual_graph
|
92
|
+
|
93
|
+
# Alternatively, you can specify a custom filter. For example, this replicates
|
94
|
+
# the one used by A. Chapanond et al. in their analysis of emails* from the Enron
|
95
|
+
# case data set
|
96
|
+
filter = Proc.new do |e, e_inverse|
|
97
|
+
if e && e_inverse
|
98
|
+
counts = [e.interactions.size, e_inverse.interactions.size]
|
99
|
+
counts.all?{ |c| c >= 6 } && counts.inject(:+) >= 30
|
100
|
+
else
|
101
|
+
false
|
102
|
+
end
|
103
|
+
end
|
104
|
+
mg = g.to_mutual_graph(&filter)
|
105
|
+
```
|
106
|
+
|
107
|
+
\*Chapanond, Anurat, Mukkai S. Krishnamoorthy, and Bülent Yener. "Graph theoretic
|
108
|
+
and spectral analysis of Enron email data." Computational & Mathematical
|
109
|
+
Organization Theory 11.3 (2005): 265-281.
|
110
|
+
|
111
|
+
## Email normalization
|
112
|
+
|
113
|
+
You'll likely want to normalize email addresses before adding them to a graph.
|
114
|
+
Otherwise, you'll end up with separate vertices for different capitalizations of
|
115
|
+
the same address - not to mention differences with '.' placement and other
|
116
|
+
issues.
|
117
|
+
|
118
|
+
`EmailGraph::InteractionGraph` will do this by default using SoundCloud's
|
119
|
+
[Normailize](https://github.com/soundcloud/normailize) gem.
|
120
|
+
|
121
|
+
You can also pass your own email processing block on instantiation for the
|
122
|
+
entire graph, or when calling `#add_message`.
|
123
|
+
|
124
|
+
## Fetching emails
|
125
|
+
|
126
|
+
A fetcher for Gmail is included for convenience.
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
g = EmailGraph::InteractionGraph.new
|
130
|
+
|
131
|
+
email = "XXX"
|
132
|
+
# You'll need an OAuth2 access token with Gmail permissions. One way to get one
|
133
|
+
# is to use the Google Oauth Playground (https://developers.google.com/oauthplayground/)
|
134
|
+
# and under "Gmail API v1" authorize "https://mail.google.com/".
|
135
|
+
access_token = "XXX"
|
136
|
+
|
137
|
+
f = EmailGraph::GmailFetcher::Fetcher.new( email: email,
|
138
|
+
access_token: access_token )
|
139
|
+
|
140
|
+
# This should cover all emails from that account. If no mailbox param is
|
141
|
+
# provided, defaults to Inbox.
|
142
|
+
mailboxes = ['[Gmail]/All Mail', '[Gmail]/Trash']
|
143
|
+
f.each_message(mailboxes: mailboxes){ |m| g.add_message(m) }
|
144
|
+
```
|
145
|
+
|
146
|
+
## Contributing
|
147
|
+
|
148
|
+
1. Fork it ( https://github.com/[my-github-username]/email_graph/fork )
|
149
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
150
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
151
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
152
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/email_graph.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'email_graph/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "email_graph"
|
8
|
+
spec.version = EmailGraph::VERSION
|
9
|
+
spec.authors = ["Ryan Dick"]
|
10
|
+
spec.email = ["rmdick@gmail.com"]
|
11
|
+
spec.summary = %q{Graph data from emails.}
|
12
|
+
spec.homepage = "https://github.com/rymodi/email_graph"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_runtime_dependency "normailize", '~> 0.0.1'
|
21
|
+
|
22
|
+
# For the GmailFetcher
|
23
|
+
spec.add_runtime_dependency "gmail_xoauth", '~> 0.4.1'
|
24
|
+
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
spec.add_development_dependency "rspec", '~> 3.1.0'
|
28
|
+
end
|
data/lib/email_graph.rb
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
module EmailGraph
|
2
|
+
|
3
|
+
# Graph with single, directed edges between vertices; loops allowed.
|
4
|
+
#
|
5
|
+
# Has these additional specifications:
|
6
|
+
# - Vertices and edges are hashable objects; the latter should
|
7
|
+
# inherit from Edge
|
8
|
+
# - Efficient fetching of in-edges in addition to out
|
9
|
+
class DirectedGraph
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@from_store = {}
|
13
|
+
@to_store = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
# All vertices
|
17
|
+
def vertices
|
18
|
+
@from_store.keys
|
19
|
+
end
|
20
|
+
|
21
|
+
# All edges
|
22
|
+
def edges
|
23
|
+
@from_store.values.to_set.flatten
|
24
|
+
end
|
25
|
+
|
26
|
+
# A specific edge from +v+ to +w+
|
27
|
+
def edge(v, w)
|
28
|
+
(@from_store[v] || []).find{ |e| e.to == w }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Out-edges from vertex +v+
|
32
|
+
def edges_from(v)
|
33
|
+
@from_store[v]
|
34
|
+
end
|
35
|
+
|
36
|
+
# In-edges to vertex +v+
|
37
|
+
def edges_to(v)
|
38
|
+
@to_store[v]
|
39
|
+
end
|
40
|
+
|
41
|
+
# Adds a vertex if it doesn't already exist and returns it
|
42
|
+
def add_vertex(v)
|
43
|
+
@from_store[v] ||= Set.new
|
44
|
+
@to_store[v] ||= Set.new
|
45
|
+
end
|
46
|
+
|
47
|
+
# Adds an edge and associated vertices if they don't already
|
48
|
+
# exist and returns the edge
|
49
|
+
def add_edge(e)
|
50
|
+
add_vertex(e.from); add_vertex(e.to)
|
51
|
+
(@from_store[e.from].add?(e) && @to_store[e.to].add(e) && e) || edge(e.from, e.to)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Yields each edge and its inverse to the provided block.
|
55
|
+
#
|
56
|
+
# Option to provide edges; default is all edges.
|
57
|
+
#
|
58
|
+
# A pair set is yielded only once (not again in reverse).
|
59
|
+
def with_each_edge_and_inverse(edges=nil, &block)
|
60
|
+
edges ||= self.edges
|
61
|
+
|
62
|
+
yielded_pairs = Set.new
|
63
|
+
edges.each do |e|
|
64
|
+
pair = Set.new([e.from, e.to])
|
65
|
+
if !yielded_pairs.include?(pair)
|
66
|
+
e_inverse = edge(e.to, e.from)
|
67
|
+
block.call(e, e_inverse)
|
68
|
+
yielded_pairs << pair
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Converts to an instance of EmailGraph::Undirected graph.
|
74
|
+
#
|
75
|
+
# The optional +edge_factory+ block should take a pair of an edge and its
|
76
|
+
# inverse (if it exists), and return either an undirected edge-ish or if there
|
77
|
+
# should be no edge between the two vertices, then return nil. If
|
78
|
+
# no block is passed, an +UndirectedEdge+ will be created if both the edge and
|
79
|
+
# its inverse exist.
|
80
|
+
#
|
81
|
+
# Only adds vertices that have edges, i.e., no isolated vertices in result.
|
82
|
+
def to_undirected(&edge_factory)
|
83
|
+
edge_factory ||= Proc.new{ |e1, e2| UndirectedEdge.new(e1.from, e1.to) if e1 && e2 }
|
84
|
+
|
85
|
+
edges = Set.new
|
86
|
+
with_each_edge_and_inverse do |e, e_inverse|
|
87
|
+
new_edge = edge_factory.call(e, e_inverse)
|
88
|
+
edges.add(new_edge) if new_edge
|
89
|
+
end
|
90
|
+
|
91
|
+
UndirectedGraph.new.tap do |g|
|
92
|
+
edges.each{ |e| g.add_edge(e) }
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
class DirectedEdge
|
99
|
+
attr_reader :from
|
100
|
+
attr_reader :to
|
101
|
+
|
102
|
+
def initialize(from, to)
|
103
|
+
raise ArgumentError, "Vertices cannot be falsy" unless from && to
|
104
|
+
@from = from
|
105
|
+
@to = to
|
106
|
+
end
|
107
|
+
|
108
|
+
def hash
|
109
|
+
from.hash ^ to.hash
|
110
|
+
end
|
111
|
+
|
112
|
+
def ==(other)
|
113
|
+
from == other.from && to == other.to
|
114
|
+
end
|
115
|
+
alias eql? ==
|
116
|
+
|
117
|
+
def to_s
|
118
|
+
"(#{from}-#{to})"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'net/imap'
|
3
|
+
require 'gmail_xoauth'
|
4
|
+
|
5
|
+
module EmailGraph
|
6
|
+
module GmailFetcher
|
7
|
+
|
8
|
+
class Fetcher
|
9
|
+
attr_accessor :batch_size, :email, :access_token
|
10
|
+
|
11
|
+
def initialize(email: nil, access_token: nil)
|
12
|
+
@email = email
|
13
|
+
@access_token = access_token
|
14
|
+
@batch_size = 500
|
15
|
+
end
|
16
|
+
|
17
|
+
def count_messages(mailboxes: ['INBOX'])
|
18
|
+
mailboxes.inject(0){ |r, m| r + imap.status(m, ['MESSAGES'])['MESSAGES'] }
|
19
|
+
end
|
20
|
+
|
21
|
+
def each_message(mailboxes: ['INBOX'], batch_size: nil)
|
22
|
+
each_envelope(mailboxes: mailboxes, batch_size: batch_size) do |e|
|
23
|
+
m = Message.from_net_imap_envelope(e)
|
24
|
+
yield m if block_given?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def each_envelope(mailboxes: ['INBOX'], batch_size: nil)
|
29
|
+
mailboxes.each do |mailbox|
|
30
|
+
# Needed before fetching
|
31
|
+
imap.examine(mailbox)
|
32
|
+
|
33
|
+
batch_size ||= @batch_size
|
34
|
+
limit = count_messages(mailboxes: [mailbox])
|
35
|
+
|
36
|
+
(1..limit).each_slice(batch_size) do |range|
|
37
|
+
envelope_batch = imap.fetch(range, 'ENVELOPE') || []
|
38
|
+
envelope_batch.each{ |e| yield e if block_given? }
|
39
|
+
puts "Fetched #{range.last}/#{limit}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def imap_connect
|
45
|
+
Net::IMAP.new('imap.gmail.com', 993, usessl = true, certs = nil, verify = false).tap do |imap|
|
46
|
+
imap.authenticate('XOAUTH2', email, access_token)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def imap
|
51
|
+
@imap ||= imap_connect
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
Message = Struct.new(:from, :to, :cc, :bcc, :date) do
|
57
|
+
|
58
|
+
def initialize(**kwargs)
|
59
|
+
kwargs.each{ |k, v| self[k] = v }
|
60
|
+
end
|
61
|
+
|
62
|
+
# @param e [+Net::IMAP::Envelope+]
|
63
|
+
# @return [Message]
|
64
|
+
def self.from_net_imap_envelope(e)
|
65
|
+
addresses_by_field = {}
|
66
|
+
address_fields = [:from, :to, :cc, :bcc]
|
67
|
+
address_fields.each do |field|
|
68
|
+
addresses = e.attr['ENVELOPE'].send(field) || []
|
69
|
+
addresses_by_field[field] = addresses.map{ |a| Address.from_net_imap_address(a) }
|
70
|
+
end
|
71
|
+
|
72
|
+
date_raw = e.attr['ENVELOPE'].date
|
73
|
+
date = nil
|
74
|
+
begin
|
75
|
+
date = Time.parse(date_raw) if date_raw
|
76
|
+
rescue ArgumentError
|
77
|
+
# Observed cases:
|
78
|
+
# - date_raw == "{DATE}"
|
79
|
+
# - Time.parse raises 'ArgumentError: argument out of range'
|
80
|
+
end
|
81
|
+
|
82
|
+
new(**addresses_by_field, date: date)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
Address = Struct.new(:name, :email) do
|
87
|
+
|
88
|
+
def initialize(**kwargs)
|
89
|
+
kwargs.each{ |k, v| self[k] = v }
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param a [Net::IMAP::Address]
|
93
|
+
# @return [Address]
|
94
|
+
def self.from_net_imap_address(a)
|
95
|
+
new( name: a.name,
|
96
|
+
email: "#{a.mailbox}@#{a.host}" )
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
module EmailGraph
|
2
|
+
|
3
|
+
# Directed graph of identities and their relationships, created by parsing
|
4
|
+
# messages.
|
5
|
+
class InteractionGraph < DirectedGraph
|
6
|
+
|
7
|
+
# @param messages [Array<#from, #to, #cc, #bcc, #date>] optional
|
8
|
+
# message-like objects. See {#add_message} for specification.
|
9
|
+
# @param email_processor [Proc] block that should return a processed email
|
10
|
+
# when passed an unprocessed one. Defaults to #default_email_processor; pass
|
11
|
+
# +Proc.new{ |e| e }+ for no processing.
|
12
|
+
def initialize(messages: [], email_processor: nil)
|
13
|
+
super()
|
14
|
+
@email_processor = email_processor || default_email_processor
|
15
|
+
|
16
|
+
messages.each{ |m| add_message(m) }
|
17
|
+
end
|
18
|
+
|
19
|
+
# Adds a message to the graph.
|
20
|
+
#
|
21
|
+
# @param m [#from, #to, #cc, #bcc, #date] message-like object. Field methods
|
22
|
+
# should return an array of objects that respond to #name and #email;
|
23
|
+
# #date should return an instance of +Time+.
|
24
|
+
# @param email_processor [Proc] block that should return a processed email
|
25
|
+
# when passed an unprocessed one. Pass +Proc.new{ |e| e }+ for no processing.
|
26
|
+
# @return m param
|
27
|
+
def add_message(m, email_processor: nil)
|
28
|
+
email_processor ||= @email_processor
|
29
|
+
|
30
|
+
# Fields are in prioritized order (e.g., if in 'to', don't process again in 'cc')
|
31
|
+
to_emails = []
|
32
|
+
[:to, :cc, :bcc].each do |field|
|
33
|
+
addresses = m.send(field) || []
|
34
|
+
addresses.each do |a|
|
35
|
+
to = email_processor.call(a.email)
|
36
|
+
unless to_emails.include?(to)
|
37
|
+
from ||= email_processor.call(m.from.first.email)
|
38
|
+
|
39
|
+
add_interaction(from, to, m.date)
|
40
|
+
|
41
|
+
to_emails << to
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
m
|
47
|
+
end
|
48
|
+
|
49
|
+
# Converts graph into an undirected one, where edges are mutual relationships.
|
50
|
+
#
|
51
|
+
# The optional +edge_filter+ is used for determining the mutual relationship
|
52
|
+
# threshold based on the edge pair. It should take an edge and its inverse as
|
53
|
+
# arguments and return true if a +MutualRelationship+ should be created.
|
54
|
+
def to_mutual_graph(&edge_filter)
|
55
|
+
edge_filter ||= Proc.new{ |e, e_inverse| e && e_inverse }
|
56
|
+
|
57
|
+
edge_factory = Proc.new do |e, e_inverse|
|
58
|
+
if edge_filter.call(e, e_inverse)
|
59
|
+
MutualRelationship.new(e.from, e.to).tap do |r|
|
60
|
+
r.interactions.push(*(e.interactions + e_inverse.interactions))
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
to_undirected(&edge_factory)
|
66
|
+
end
|
67
|
+
|
68
|
+
def default_email_processor
|
69
|
+
Proc.new do |email|
|
70
|
+
begin
|
71
|
+
Normailize::EmailAddress.new(email).normalized_address
|
72
|
+
rescue ArgumentError
|
73
|
+
# Chokes on emails like "twitter-confirmation-blah=gmail.com@postmaster.twitter.com"
|
74
|
+
email.downcase
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def add_interaction(from, to, date)
|
82
|
+
r = add_edge(InteractionRelationship.new(from, to))
|
83
|
+
r.add_interaction(date)
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
class InteractionRelationship < DirectedEdge
|
89
|
+
attr_reader :interactions
|
90
|
+
|
91
|
+
def initialize(from, to)
|
92
|
+
super
|
93
|
+
@interactions = []
|
94
|
+
end
|
95
|
+
|
96
|
+
def add_interaction(date)
|
97
|
+
interactions << date
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
class MutualRelationship < UndirectedEdge
|
102
|
+
attr_reader :interactions
|
103
|
+
|
104
|
+
def initialize(v, w)
|
105
|
+
super
|
106
|
+
@interactions = []
|
107
|
+
end
|
108
|
+
|
109
|
+
def add_interaction(date)
|
110
|
+
interactions << date
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module EmailGraph
|
2
|
+
|
3
|
+
class UndirectedGraph
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@store = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
# All vertices
|
10
|
+
def vertices
|
11
|
+
@store.keys
|
12
|
+
end
|
13
|
+
|
14
|
+
# All edges
|
15
|
+
def edges
|
16
|
+
@store.values.to_set.flatten
|
17
|
+
end
|
18
|
+
|
19
|
+
# A specific edge from +v+ to +w+
|
20
|
+
def edge(v, w)
|
21
|
+
(@store[v] || []).find{ |e| e.vertices.include?(w) }
|
22
|
+
end
|
23
|
+
|
24
|
+
# Edges involving a vertex +v+
|
25
|
+
def edges_with(v)
|
26
|
+
@store[v]
|
27
|
+
end
|
28
|
+
|
29
|
+
# Adds a vertex if it doesn't already exist and returns it
|
30
|
+
def add_vertex(v)
|
31
|
+
@store[v] ||= Set.new
|
32
|
+
end
|
33
|
+
|
34
|
+
# Adds an edge and associated vertices if they don't already
|
35
|
+
# exist and returns the edge
|
36
|
+
def add_edge(e)
|
37
|
+
v, w = *e.vertices
|
38
|
+
edge(v, w) || e.vertices.each{ |v| add_vertex(v); @store[v].add(e)}
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
class UndirectedEdge
|
44
|
+
attr_reader :vertices
|
45
|
+
|
46
|
+
def initialize(v, w)
|
47
|
+
raise ArgumentError, "Vertices cannot be falsy" unless v && w
|
48
|
+
@vertices = Set.new([v, w])
|
49
|
+
end
|
50
|
+
|
51
|
+
def hash
|
52
|
+
@vertices.hash
|
53
|
+
end
|
54
|
+
|
55
|
+
def ==(other)
|
56
|
+
@vertices == other.vertices
|
57
|
+
end
|
58
|
+
alias eql? ==
|
59
|
+
|
60
|
+
def to_s
|
61
|
+
a = @vertices.to_a
|
62
|
+
"(#{a[0]}-#{a[1]})"
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe EmailGraph::DirectedGraph do
|
4
|
+
let(:edge_pairs) {[ [1, 2],
|
5
|
+
[2, 1],
|
6
|
+
[2, 3] ]}
|
7
|
+
let(:edges) { edge_pairs.map{ |p| EmailGraph::DirectedEdge.new(p[0], p[1]) } }
|
8
|
+
let(:g) do
|
9
|
+
EmailGraph::DirectedGraph.new.tap do |g|
|
10
|
+
edges.each{ |e| g.add_edge(e) }
|
11
|
+
g.add_vertex(4)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'returns all vertices' do
|
16
|
+
expect(g.vertices).to contain_exactly(1, 2, 3, 4)
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'returns all edges' do
|
20
|
+
expect(g.edges).to contain_exactly(*edges)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns a specific edge' do
|
24
|
+
expect(g.edge(1,2)).to eq(EmailGraph::DirectedEdge.new(1,2))
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'returns edges from a vertex' do
|
28
|
+
expect(g.edges_from(2).map(&:to)).to contain_exactly(1, 3)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'returns edges to a vertex' do
|
32
|
+
expect(g.edges_to(2).map(&:from)).to contain_exactly(1)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns existing edge when trying to add one with the same vertices' do
|
36
|
+
e = NewDirectedEdgeType.new(2, 3)
|
37
|
+
|
38
|
+
expect(g.add_edge(e)).to be_instance_of(EmailGraph::DirectedEdge)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'iterates through each edge and its inverse' do
|
42
|
+
expected_yield = [ [g.edge(1, 2), g.edge(2, 1)],
|
43
|
+
[g.edge(2, 3), nil] ]
|
44
|
+
|
45
|
+
expect{ |b| g.with_each_edge_and_inverse(&b) }.to yield_successive_args(*expected_yield)
|
46
|
+
end
|
47
|
+
|
48
|
+
describe '#to_undirected' do
|
49
|
+
|
50
|
+
it 'returns with the correct edges and vertices using default edge_factory' do
|
51
|
+
undirected = g.to_undirected
|
52
|
+
|
53
|
+
expected_edges = [EmailGraph::UndirectedEdge.new(1, 2)]
|
54
|
+
expect(undirected.edges).to contain_exactly(*expected_edges)
|
55
|
+
|
56
|
+
expected_vertices = [1, 2]
|
57
|
+
expect(undirected.vertices).to contain_exactly(*expected_vertices)
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'returns with the correct edges when using a provided edge_factory' do
|
61
|
+
at_least_one_way_edge_factory = Proc.new{ |e1, e2| EmailGraph::UndirectedEdge.new(e1.from, e1.to) if e1 }
|
62
|
+
|
63
|
+
undirected = g.to_undirected(&at_least_one_way_edge_factory)
|
64
|
+
|
65
|
+
expected_edges = [ EmailGraph::UndirectedEdge.new(1, 2),
|
66
|
+
EmailGraph::UndirectedEdge.new(2, 3) ]
|
67
|
+
expect(undirected.edges).to contain_exactly(*expected_edges)
|
68
|
+
|
69
|
+
expected_vertices = [1, 2, 3]
|
70
|
+
expect(undirected.vertices).to contain_exactly(*expected_vertices)
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
describe EmailGraph::DirectedEdge do
|
78
|
+
|
79
|
+
it 'raises when a vertex is nil' do
|
80
|
+
expect{ EmailGraph::DirectedEdge.new(nil, 1) }.to raise_exception(ArgumentError)
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'converts into a string' do
|
84
|
+
edge = EmailGraph::DirectedEdge.new("v", "w")
|
85
|
+
expect(edge.to_s).to eq("(v-w)")
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
class NewDirectedEdgeType < EmailGraph::DirectedEdge; end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
describe EmailGraph::InteractionGraph do
|
5
|
+
let(:a0) { OpenStruct.new(name: "a0", email: "a0@example.com") }
|
6
|
+
let(:a1) { OpenStruct.new(name: "a1", email: "a1@example.com") }
|
7
|
+
let(:msg) { OpenStruct.new(from: [a0], to: [a1], date: Time.now) }
|
8
|
+
let(:g) { EmailGraph::InteractionGraph.new }
|
9
|
+
|
10
|
+
describe '#add_message' do
|
11
|
+
|
12
|
+
it 'adds vertices (identities)' do
|
13
|
+
g.add_message(msg)
|
14
|
+
|
15
|
+
expect(g.vertices).to contain_exactly(a0.email, a1.email)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'adds edges (relationships)' do
|
19
|
+
g.add_message(msg)
|
20
|
+
|
21
|
+
expected_edge = EmailGraph::InteractionRelationship.new(a0.email, a1.email)
|
22
|
+
expect(g.edges).to contain_exactly(expected_edge)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'adds interactions to relationships' do
|
26
|
+
g.add_message(msg)
|
27
|
+
|
28
|
+
expect(g.edge(a0.email, a1.email).interactions).to contain_exactly(msg.date)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'adds interactions ignoring duplicates' do
|
32
|
+
msg_with_dups = OpenStruct.new( from: [a0],
|
33
|
+
to: [a1],
|
34
|
+
cc: [a1],
|
35
|
+
date: Time.now )
|
36
|
+
|
37
|
+
g.add_message(msg_with_dups)
|
38
|
+
|
39
|
+
expect(g.edge(a0.email, a1.email).interactions).to contain_exactly(msg_with_dups.date)
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'raises when message addresses have nil emails' do
|
43
|
+
nil_email = OpenStruct.new(name: "Nil Email", email: nil)
|
44
|
+
|
45
|
+
msg_with_nil_email = OpenStruct.new( from: [a0],
|
46
|
+
to: [nil_email],
|
47
|
+
date: Time.now )
|
48
|
+
|
49
|
+
expect{g.add_message(msg_with_nil_email)}.to raise_exception(ArgumentError)
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'normalizes emails if no processor is provided' do
|
53
|
+
a_norm = OpenStruct.new(name: "Norm", email: "test@gmail.com")
|
54
|
+
a_not_norm = OpenStruct.new(name: "Not Norm", email: "tEs.t+blah@gmail.com")
|
55
|
+
|
56
|
+
msg = OpenStruct.new(from: [a_norm], to: [a_not_norm], date: Time.now)
|
57
|
+
g.add_message(msg)
|
58
|
+
|
59
|
+
expected_edge = EmailGraph::InteractionRelationship.new('test@gmail.com', 'test@gmail.com')
|
60
|
+
expect(g.edges).to contain_exactly(expected_edge)
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'processes emails if a processor is provided' do
|
64
|
+
processor = Proc.new{ |e| "EMAIL@EMAIL.COM" }
|
65
|
+
g.add_message(msg, email_processor: processor)
|
66
|
+
|
67
|
+
expect(g.vertices).to contain_exactly("EMAIL@EMAIL.COM")
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
describe '#to_mutual_graph' do
|
73
|
+
|
74
|
+
it 'returns with the correct edges and vertices using default filter' do
|
75
|
+
g.add_message(msg)
|
76
|
+
g.add_message(OpenStruct.new(from: [a1], to: [a0], date: Time.now))
|
77
|
+
mutual_graph = g.to_mutual_graph
|
78
|
+
|
79
|
+
expected_edges = [EmailGraph::MutualRelationship.new(a0.email, a1.email)]
|
80
|
+
expect(mutual_graph.edges).to contain_exactly(*expected_edges)
|
81
|
+
|
82
|
+
expected_vertices = [a0.email, a1.email]
|
83
|
+
expect(mutual_graph.vertices).to contain_exactly(*expected_vertices)
|
84
|
+
|
85
|
+
expect(mutual_graph.edge(a0.email, a1.email).interactions.size).to eq(2)
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require_relative '../lib/email_graph.rb'
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe EmailGraph::UndirectedGraph do
|
4
|
+
let(:edge_pairs) {[ [1, 2],
|
5
|
+
[2, 3] ]}
|
6
|
+
let(:edges) { edge_pairs.map{ |p| EmailGraph::UndirectedEdge.new(p[0], p[1]) } }
|
7
|
+
let(:g) do
|
8
|
+
EmailGraph::UndirectedGraph.new.tap do |g|
|
9
|
+
edges.each{ |e| g.add_edge(e) }
|
10
|
+
g.add_vertex(4)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'returns all vertices' do
|
15
|
+
expect(g.vertices).to contain_exactly(1, 2, 3, 4)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns all edges' do
|
19
|
+
expect(g.edges).to contain_exactly(*edges)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'returns a specific edge, vertex order ignored' do
|
23
|
+
expected_edge = EmailGraph::UndirectedEdge.new(1,2)
|
24
|
+
expect(g.edge(2, 1)).to eq(expected_edge)
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'returns edges involving a vertex' do
|
28
|
+
expected_edges = [EmailGraph::UndirectedEdge.new(2,3)]
|
29
|
+
expect(g.edges_with(3)).to contain_exactly(*expected_edges)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns an existing edge if available when adding one' do
|
33
|
+
e = NewUndirectedEdgeType.new(2, 3)
|
34
|
+
|
35
|
+
expect(g.add_edge(e)).to be_instance_of(EmailGraph::UndirectedEdge)
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
describe EmailGraph::UndirectedEdge do
|
41
|
+
|
42
|
+
it 'raises when a vertex is nil' do
|
43
|
+
expect{ EmailGraph::UndirectedEdge.new(nil, 1) }.to raise_exception(ArgumentError)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'converts into a string' do
|
47
|
+
edge = EmailGraph::UndirectedEdge.new("v", "w")
|
48
|
+
expect(edge.to_s).to match(/^\((v-w|w-v)\)$/)
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
class NewUndirectedEdgeType < EmailGraph::UndirectedEdge; end
|
metadata
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: email_graph
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ryan Dick
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: normailize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.0.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: gmail_xoauth
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.4.1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.4.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.7'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.7'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 3.1.0
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 3.1.0
|
83
|
+
description:
|
84
|
+
email:
|
85
|
+
- rmdick@gmail.com
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- ".gitignore"
|
91
|
+
- Gemfile
|
92
|
+
- LICENSE.txt
|
93
|
+
- README.md
|
94
|
+
- Rakefile
|
95
|
+
- email_graph.gemspec
|
96
|
+
- lib/email_graph.rb
|
97
|
+
- lib/email_graph/directed_graph.rb
|
98
|
+
- lib/email_graph/gmail_fetcher.rb
|
99
|
+
- lib/email_graph/interaction_graph.rb
|
100
|
+
- lib/email_graph/undirected_graph.rb
|
101
|
+
- lib/email_graph/version.rb
|
102
|
+
- spec/directed_graph_spec.rb
|
103
|
+
- spec/interaction_graph_spec.rb
|
104
|
+
- spec/spec_helper.rb
|
105
|
+
- spec/undirected_graph_spec.rb
|
106
|
+
homepage: https://github.com/rymodi/email_graph
|
107
|
+
licenses:
|
108
|
+
- MIT
|
109
|
+
metadata: {}
|
110
|
+
post_install_message:
|
111
|
+
rdoc_options: []
|
112
|
+
require_paths:
|
113
|
+
- lib
|
114
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0'
|
119
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
requirements: []
|
125
|
+
rubyforge_project:
|
126
|
+
rubygems_version: 2.2.2
|
127
|
+
signing_key:
|
128
|
+
specification_version: 4
|
129
|
+
summary: Graph data from emails.
|
130
|
+
test_files:
|
131
|
+
- spec/directed_graph_spec.rb
|
132
|
+
- spec/interaction_graph_spec.rb
|
133
|
+
- spec/spec_helper.rb
|
134
|
+
- spec/undirected_graph_spec.rb
|
135
|
+
has_rdoc:
|