email_graph 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +152 -0
- data/Rakefile +2 -0
- data/email_graph.gemspec +28 -0
- data/lib/email_graph.rb +7 -0
- data/lib/email_graph/directed_graph.rb +122 -0
- data/lib/email_graph/gmail_fetcher.rb +102 -0
- data/lib/email_graph/interaction_graph.rb +114 -0
- data/lib/email_graph/undirected_graph.rb +67 -0
- data/lib/email_graph/version.rb +3 -0
- data/spec/directed_graph_spec.rb +90 -0
- data/spec/interaction_graph_spec.rb +90 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/undirected_graph_spec.rb +53 -0
- metadata +135 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 60c190e3910f79ec5129c9cca0b0227b87390e68
|
4
|
+
data.tar.gz: 2fe939d783d01402f1acecd9fac40aeda554d815
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 53e992b88cff05752d47163afb3d0f0d26c71eb788f3f4c75e67f722a8ff3a325a1b2016e1ac17644571d79a98b8089a2be64cfeae45d862e1e3e7daf27a4232
|
7
|
+
data.tar.gz: 97e5b6f60728506c480982ea02de8a921012daef7a1100632c90f5d0fba9ad1e06a25cf960319b1bfa678dcacf9131889f50d5063ac25f9132edef622418d19c
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Ryan Dick
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
# EmailGraph
|
2
|
+
|
3
|
+
Build and analyze graph data structures from email history.
|
4
|
+
|
5
|
+
Focus is on identities and interactions between them, for example:
|
6
|
+
* Who do I email with the most?
|
7
|
+
* Who are my strong contacts, as identified by some two-way interaction
|
8
|
+
threshold function?
|
9
|
+
* What is the distribution of my email interactions with a given person over
|
10
|
+
time?
|
11
|
+
|
12
|
+
Subject lines and message bodies are currently ignored for simplification.
|
13
|
+
|
14
|
+
## Installation
|
15
|
+
|
16
|
+
Add this line to your application's Gemfile:
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
gem 'email_graph'
|
20
|
+
```
|
21
|
+
|
22
|
+
And then execute:
|
23
|
+
|
24
|
+
$ bundle
|
25
|
+
|
26
|
+
Or install it yourself as:
|
27
|
+
|
28
|
+
$ gem install email_graph
|
29
|
+
|
30
|
+
## Graph types
|
31
|
+
|
32
|
+
There are two types of graphs to be built from email data.
|
33
|
+
|
34
|
+
### 1. Interaction graph
|
35
|
+
|
36
|
+
Class: `EmailGraph::InteractionGraph`
|
37
|
+
|
38
|
+
This is a directed graph where each vertex is an email address and each edge is
|
39
|
+
an instance of `EmailGraph::InteractionRelationship` - a directed interaction
|
40
|
+
history between two emails.
|
41
|
+
|
42
|
+
The graph has these properties:
|
43
|
+
* Implements common graph methods (e.g., `#vertices`, `#edges`, etc)
|
44
|
+
* Efficient fetching of vertices' in-edges (not always a default of graph
|
45
|
+
structures)
|
46
|
+
* Loops allowed
|
47
|
+
|
48
|
+
Given a message, an interaction is created from the sender to every address in
|
49
|
+
the `to`, `cc`, and `bcc` fields - there is no distinction among the latter.
|
50
|
+
|
51
|
+
`EmailGraph::InteractionRelationship` objects have an `interactions` attribute,
|
52
|
+
which holds an array of the `Time` objects of the interactions.
|
53
|
+
|
54
|
+
Example:
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
# Assuming you have an array 'messages' of message-like objects (see below for
|
58
|
+
# how to use the Gmail fetcher)
|
59
|
+
g = EmailGraph::InteractionGraph.new
|
60
|
+
messages.each{ |m| g.add_message(m) }
|
61
|
+
|
62
|
+
# ...or...
|
63
|
+
|
64
|
+
g = EmailGraph::InteractionGraph.new(messages: messages)
|
65
|
+
|
66
|
+
# For example, see a sorted list of your email contacts by emails sent
|
67
|
+
g.edges_from("your@emailhere.com")
|
68
|
+
.sort_by{ |e| -e.interactions.size }
|
69
|
+
.map{ |e| [e.to, e.interactions.size] }
|
70
|
+
```
|
71
|
+
|
72
|
+
### 2. Mutual relationship graph
|
73
|
+
|
74
|
+
Class: `EmailGraph::UndirectedGraph` (just uses the abstract class)
|
75
|
+
|
76
|
+
This is an undirected graph where each vertex is also an email, however, this
|
77
|
+
time, the edges are instances of `EmailGraph::MutualRelationship` - an
|
78
|
+
undirected edge that similarly includes an interaction history (though an
|
79
|
+
undirected one).
|
80
|
+
|
81
|
+
This graph is created from an `EmailGraph::InteractionGraph` by creating
|
82
|
+
undirected edges from pairs of directed edge inverses. Optionally, a filter can
|
83
|
+
be applied during this process to determine whether an undirected edge is
|
84
|
+
created for a given pair of directed edges.
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
g = EmailGraph::InteractionGraph.new(messages: messages)
|
88
|
+
|
89
|
+
# This creates the graph using the default filter, which is that an edge has to
|
90
|
+
# have an inverse in order to create a new undirected edge.
|
91
|
+
mg = g.to_mutual_graph
|
92
|
+
|
93
|
+
# Alternatively, you can specify a custom filter. For example, this replicates
|
94
|
+
# the one used by A. Chapanond et al. in their analysis of emails* from the Enron
|
95
|
+
# case data set
|
96
|
+
filter = Proc.new do |e, e_inverse|
|
97
|
+
if e && e_inverse
|
98
|
+
counts = [e.interactions.size, e_inverse.interactions.size]
|
99
|
+
counts.all?{ |c| c >= 6 } && counts.inject(:+) >= 30
|
100
|
+
else
|
101
|
+
false
|
102
|
+
end
|
103
|
+
end
|
104
|
+
mg = g.to_mutual_graph(&filter)
|
105
|
+
```
|
106
|
+
|
107
|
+
\*Chapanond, Anurat, Mukkai S. Krishnamoorthy, and Bülent Yener. "Graph theoretic
|
108
|
+
and spectral analysis of Enron email data." Computational & Mathematical
|
109
|
+
Organization Theory 11.3 (2005): 265-281.
|
110
|
+
|
111
|
+
## Email normalization
|
112
|
+
|
113
|
+
You'll likely want to normalize email addresses before adding them to a graph.
|
114
|
+
Otherwise, you'll end up with separate vertices for different capitalizations of
|
115
|
+
the same address - not to mention differences with '.' placement and other
|
116
|
+
issues.
|
117
|
+
|
118
|
+
`EmailGraph::InteractionGraph` will do this by default using SoundCloud's
|
119
|
+
[Normailize](https://github.com/soundcloud/normailize) gem.
|
120
|
+
|
121
|
+
You can also pass your own email processing block on instantiation for the
|
122
|
+
entire graph, or when calling `#add_message`.
|
123
|
+
|
124
|
+
## Fetching emails
|
125
|
+
|
126
|
+
A fetcher for Gmail is included for convenience.
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
g = EmailGraph::InteractionGraph.new
|
130
|
+
|
131
|
+
email = "XXX"
|
132
|
+
# You'll need an OAuth2 access token with Gmail permissions. One way to get one
|
133
|
+
# is to use the Google Oauth Playground (https://developers.google.com/oauthplayground/)
|
134
|
+
# and under "Gmail API v1" authorize "https://mail.google.com/".
|
135
|
+
access_token = "XXX"
|
136
|
+
|
137
|
+
f = EmailGraph::GmailFetcher::Fetcher.new( email: email,
|
138
|
+
access_token: access_token )
|
139
|
+
|
140
|
+
# This should cover all emails from that account. If no mailbox param is
|
141
|
+
# provided, defaults to Inbox.
|
142
|
+
mailboxes = ['[Gmail]/All Mail', '[Gmail]/Trash']
|
143
|
+
f.each_message(mailboxes: mailboxes){ |m| g.add_message(m) }
|
144
|
+
```
|
145
|
+
|
146
|
+
## Contributing
|
147
|
+
|
148
|
+
1. Fork it ( https://github.com/[my-github-username]/email_graph/fork )
|
149
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
150
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
151
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
152
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/email_graph.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'email_graph/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "email_graph"
|
8
|
+
spec.version = EmailGraph::VERSION
|
9
|
+
spec.authors = ["Ryan Dick"]
|
10
|
+
spec.email = ["rmdick@gmail.com"]
|
11
|
+
spec.summary = %q{Graph data from emails.}
|
12
|
+
spec.homepage = "https://github.com/rymodi/email_graph"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_runtime_dependency "normailize", '~> 0.0.1'
|
21
|
+
|
22
|
+
# For the GmailFetcher
|
23
|
+
spec.add_runtime_dependency "gmail_xoauth", '~> 0.4.1'
|
24
|
+
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
spec.add_development_dependency "rspec", '~> 3.1.0'
|
28
|
+
end
|
data/lib/email_graph.rb
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
module EmailGraph
|
2
|
+
|
3
|
+
# Graph with single, directed edges between vertices; loops allowed.
|
4
|
+
#
|
5
|
+
# Has these additional specifications:
|
6
|
+
# - Vertices and edges are hashable objects; the latter should
|
7
|
+
# inherit from Edge
|
8
|
+
# - Efficient fetching of in-edges in addition to out
|
9
|
+
class DirectedGraph
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@from_store = {}
|
13
|
+
@to_store = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
# All vertices
|
17
|
+
def vertices
|
18
|
+
@from_store.keys
|
19
|
+
end
|
20
|
+
|
21
|
+
# All edges
|
22
|
+
def edges
|
23
|
+
@from_store.values.to_set.flatten
|
24
|
+
end
|
25
|
+
|
26
|
+
# A specific edge from +v+ to +w+
|
27
|
+
def edge(v, w)
|
28
|
+
(@from_store[v] || []).find{ |e| e.to == w }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Out-edges from vertex +v+
|
32
|
+
def edges_from(v)
|
33
|
+
@from_store[v]
|
34
|
+
end
|
35
|
+
|
36
|
+
# In-edges to vertex +v+
|
37
|
+
def edges_to(v)
|
38
|
+
@to_store[v]
|
39
|
+
end
|
40
|
+
|
41
|
+
# Adds a vertex if it doesn't already exist and returns it
|
42
|
+
def add_vertex(v)
|
43
|
+
@from_store[v] ||= Set.new
|
44
|
+
@to_store[v] ||= Set.new
|
45
|
+
end
|
46
|
+
|
47
|
+
# Adds an edge and associated vertices if they don't already
|
48
|
+
# exist and returns the edge
|
49
|
+
def add_edge(e)
|
50
|
+
add_vertex(e.from); add_vertex(e.to)
|
51
|
+
(@from_store[e.from].add?(e) && @to_store[e.to].add(e) && e) || edge(e.from, e.to)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Yields each edge and its inverse to the provided block.
|
55
|
+
#
|
56
|
+
# Option to provide edges; default is all edges.
|
57
|
+
#
|
58
|
+
# A pair set is yielded only once (not again in reverse).
|
59
|
+
def with_each_edge_and_inverse(edges=nil, &block)
|
60
|
+
edges ||= self.edges
|
61
|
+
|
62
|
+
yielded_pairs = Set.new
|
63
|
+
edges.each do |e|
|
64
|
+
pair = Set.new([e.from, e.to])
|
65
|
+
if !yielded_pairs.include?(pair)
|
66
|
+
e_inverse = edge(e.to, e.from)
|
67
|
+
block.call(e, e_inverse)
|
68
|
+
yielded_pairs << pair
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Converts to an instance of EmailGraph::Undirected graph.
|
74
|
+
#
|
75
|
+
# The optional +edge_factory+ block should take a pair of an edge and its
|
76
|
+
# inverse (if it exists), and return either an undirected edge-ish or if there
|
77
|
+
# should be no edge between the two vertices, then return nil. If
|
78
|
+
# no block is passed, an +UndirectedEdge+ will be created if both the edge and
|
79
|
+
# its inverse exist.
|
80
|
+
#
|
81
|
+
# Only adds vertices that have edges, i.e., no isolated vertices in result.
|
82
|
+
def to_undirected(&edge_factory)
|
83
|
+
edge_factory ||= Proc.new{ |e1, e2| UndirectedEdge.new(e1.from, e1.to) if e1 && e2 }
|
84
|
+
|
85
|
+
edges = Set.new
|
86
|
+
with_each_edge_and_inverse do |e, e_inverse|
|
87
|
+
new_edge = edge_factory.call(e, e_inverse)
|
88
|
+
edges.add(new_edge) if new_edge
|
89
|
+
end
|
90
|
+
|
91
|
+
UndirectedGraph.new.tap do |g|
|
92
|
+
edges.each{ |e| g.add_edge(e) }
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
class DirectedEdge
|
99
|
+
attr_reader :from
|
100
|
+
attr_reader :to
|
101
|
+
|
102
|
+
def initialize(from, to)
|
103
|
+
raise ArgumentError, "Vertices cannot be falsy" unless from && to
|
104
|
+
@from = from
|
105
|
+
@to = to
|
106
|
+
end
|
107
|
+
|
108
|
+
def hash
|
109
|
+
from.hash ^ to.hash
|
110
|
+
end
|
111
|
+
|
112
|
+
def ==(other)
|
113
|
+
from == other.from && to == other.to
|
114
|
+
end
|
115
|
+
alias eql? ==
|
116
|
+
|
117
|
+
def to_s
|
118
|
+
"(#{from}-#{to})"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'net/imap'
|
3
|
+
require 'gmail_xoauth'
|
4
|
+
|
5
|
+
module EmailGraph
|
6
|
+
module GmailFetcher
|
7
|
+
|
8
|
+
class Fetcher
|
9
|
+
attr_accessor :batch_size, :email, :access_token
|
10
|
+
|
11
|
+
def initialize(email: nil, access_token: nil)
|
12
|
+
@email = email
|
13
|
+
@access_token = access_token
|
14
|
+
@batch_size = 500
|
15
|
+
end
|
16
|
+
|
17
|
+
def count_messages(mailboxes: ['INBOX'])
|
18
|
+
mailboxes.inject(0){ |r, m| r + imap.status(m, ['MESSAGES'])['MESSAGES'] }
|
19
|
+
end
|
20
|
+
|
21
|
+
def each_message(mailboxes: ['INBOX'], batch_size: nil)
|
22
|
+
each_envelope(mailboxes: mailboxes, batch_size: batch_size) do |e|
|
23
|
+
m = Message.from_net_imap_envelope(e)
|
24
|
+
yield m if block_given?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def each_envelope(mailboxes: ['INBOX'], batch_size: nil)
|
29
|
+
mailboxes.each do |mailbox|
|
30
|
+
# Needed before fetching
|
31
|
+
imap.examine(mailbox)
|
32
|
+
|
33
|
+
batch_size ||= @batch_size
|
34
|
+
limit = count_messages(mailboxes: [mailbox])
|
35
|
+
|
36
|
+
(1..limit).each_slice(batch_size) do |range|
|
37
|
+
envelope_batch = imap.fetch(range, 'ENVELOPE') || []
|
38
|
+
envelope_batch.each{ |e| yield e if block_given? }
|
39
|
+
puts "Fetched #{range.last}/#{limit}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def imap_connect
|
45
|
+
Net::IMAP.new('imap.gmail.com', 993, usessl = true, certs = nil, verify = false).tap do |imap|
|
46
|
+
imap.authenticate('XOAUTH2', email, access_token)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def imap
|
51
|
+
@imap ||= imap_connect
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
Message = Struct.new(:from, :to, :cc, :bcc, :date) do
|
57
|
+
|
58
|
+
def initialize(**kwargs)
|
59
|
+
kwargs.each{ |k, v| self[k] = v }
|
60
|
+
end
|
61
|
+
|
62
|
+
# @param e [+Net::IMAP::Envelope+]
|
63
|
+
# @return [Message]
|
64
|
+
def self.from_net_imap_envelope(e)
|
65
|
+
addresses_by_field = {}
|
66
|
+
address_fields = [:from, :to, :cc, :bcc]
|
67
|
+
address_fields.each do |field|
|
68
|
+
addresses = e.attr['ENVELOPE'].send(field) || []
|
69
|
+
addresses_by_field[field] = addresses.map{ |a| Address.from_net_imap_address(a) }
|
70
|
+
end
|
71
|
+
|
72
|
+
date_raw = e.attr['ENVELOPE'].date
|
73
|
+
date = nil
|
74
|
+
begin
|
75
|
+
date = Time.parse(date_raw) if date_raw
|
76
|
+
rescue ArgumentError
|
77
|
+
# Observed cases:
|
78
|
+
# - date_raw == "{DATE}"
|
79
|
+
# - Time.parse raises 'ArgumentError: argument out of range'
|
80
|
+
end
|
81
|
+
|
82
|
+
new(**addresses_by_field, date: date)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
Address = Struct.new(:name, :email) do
|
87
|
+
|
88
|
+
def initialize(**kwargs)
|
89
|
+
kwargs.each{ |k, v| self[k] = v }
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param a [Net::IMAP::Address]
|
93
|
+
# @return [Address]
|
94
|
+
def self.from_net_imap_address(a)
|
95
|
+
new( name: a.name,
|
96
|
+
email: "#{a.mailbox}@#{a.host}" )
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
module EmailGraph
|
2
|
+
|
3
|
+
# Directed graph of identities and their relationships, created by parsing
|
4
|
+
# messages.
|
5
|
+
class InteractionGraph < DirectedGraph
|
6
|
+
|
7
|
+
# @param messages [Array<#from, #to, #cc, #bcc, #date>] optional
|
8
|
+
# message-like objects. See {#add_message} for specification.
|
9
|
+
# @param email_processor [Proc] block that should return a processed email
|
10
|
+
# when passed an unprocessed one. Defaults to #default_email_processor; pass
|
11
|
+
# +Proc.new{ |e| e }+ for no processing.
|
12
|
+
def initialize(messages: [], email_processor: nil)
|
13
|
+
super()
|
14
|
+
@email_processor = email_processor || default_email_processor
|
15
|
+
|
16
|
+
messages.each{ |m| add_message(m) }
|
17
|
+
end
|
18
|
+
|
19
|
+
# Adds a message to the graph.
|
20
|
+
#
|
21
|
+
# @param m [#from, #to, #cc, #bcc, #date] message-like object. Field methods
|
22
|
+
# should return an array of objects that respond to #name and #email;
|
23
|
+
# #date should return an instance of +Time+.
|
24
|
+
# @param email_processor [Proc] block that should return a processed email
|
25
|
+
# when passed an unprocessed one. Pass +Proc.new{ |e| e }+ for no processing.
|
26
|
+
# @return m param
|
27
|
+
def add_message(m, email_processor: nil)
|
28
|
+
email_processor ||= @email_processor
|
29
|
+
|
30
|
+
# Fields are in prioritized order (e.g., if in 'to', don't process again in 'cc')
|
31
|
+
to_emails = []
|
32
|
+
[:to, :cc, :bcc].each do |field|
|
33
|
+
addresses = m.send(field) || []
|
34
|
+
addresses.each do |a|
|
35
|
+
to = email_processor.call(a.email)
|
36
|
+
unless to_emails.include?(to)
|
37
|
+
from ||= email_processor.call(m.from.first.email)
|
38
|
+
|
39
|
+
add_interaction(from, to, m.date)
|
40
|
+
|
41
|
+
to_emails << to
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
m
|
47
|
+
end
|
48
|
+
|
49
|
+
# Converts graph into an undirected one, where edges are mutual relationships.
|
50
|
+
#
|
51
|
+
# The optional +edge_filter+ is used for determining the mutual relationship
|
52
|
+
# threshold based on the edge pair. It should take an edge and its inverse as
|
53
|
+
# arguments and return true if a +MutualRelationship+ should be created.
|
54
|
+
def to_mutual_graph(&edge_filter)
|
55
|
+
edge_filter ||= Proc.new{ |e, e_inverse| e && e_inverse }
|
56
|
+
|
57
|
+
edge_factory = Proc.new do |e, e_inverse|
|
58
|
+
if edge_filter.call(e, e_inverse)
|
59
|
+
MutualRelationship.new(e.from, e.to).tap do |r|
|
60
|
+
r.interactions.push(*(e.interactions + e_inverse.interactions))
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
to_undirected(&edge_factory)
|
66
|
+
end
|
67
|
+
|
68
|
+
def default_email_processor
|
69
|
+
Proc.new do |email|
|
70
|
+
begin
|
71
|
+
Normailize::EmailAddress.new(email).normalized_address
|
72
|
+
rescue ArgumentError
|
73
|
+
# Chokes on emails like "twitter-confirmation-blah=gmail.com@postmaster.twitter.com"
|
74
|
+
email.downcase
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def add_interaction(from, to, date)
|
82
|
+
r = add_edge(InteractionRelationship.new(from, to))
|
83
|
+
r.add_interaction(date)
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
class InteractionRelationship < DirectedEdge
|
89
|
+
attr_reader :interactions
|
90
|
+
|
91
|
+
def initialize(from, to)
|
92
|
+
super
|
93
|
+
@interactions = []
|
94
|
+
end
|
95
|
+
|
96
|
+
def add_interaction(date)
|
97
|
+
interactions << date
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
class MutualRelationship < UndirectedEdge
|
102
|
+
attr_reader :interactions
|
103
|
+
|
104
|
+
def initialize(v, w)
|
105
|
+
super
|
106
|
+
@interactions = []
|
107
|
+
end
|
108
|
+
|
109
|
+
def add_interaction(date)
|
110
|
+
interactions << date
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module EmailGraph
|
2
|
+
|
3
|
+
class UndirectedGraph
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@store = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
# All vertices
|
10
|
+
def vertices
|
11
|
+
@store.keys
|
12
|
+
end
|
13
|
+
|
14
|
+
# All edges
|
15
|
+
def edges
|
16
|
+
@store.values.to_set.flatten
|
17
|
+
end
|
18
|
+
|
19
|
+
# A specific edge from +v+ to +w+
|
20
|
+
def edge(v, w)
|
21
|
+
(@store[v] || []).find{ |e| e.vertices.include?(w) }
|
22
|
+
end
|
23
|
+
|
24
|
+
# Edges involving a vertex +v+
|
25
|
+
def edges_with(v)
|
26
|
+
@store[v]
|
27
|
+
end
|
28
|
+
|
29
|
+
# Adds a vertex if it doesn't already exist and returns it
|
30
|
+
def add_vertex(v)
|
31
|
+
@store[v] ||= Set.new
|
32
|
+
end
|
33
|
+
|
34
|
+
# Adds an edge and associated vertices if they don't already
|
35
|
+
# exist and returns the edge
|
36
|
+
def add_edge(e)
|
37
|
+
v, w = *e.vertices
|
38
|
+
edge(v, w) || e.vertices.each{ |v| add_vertex(v); @store[v].add(e)}
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
class UndirectedEdge
|
44
|
+
attr_reader :vertices
|
45
|
+
|
46
|
+
def initialize(v, w)
|
47
|
+
raise ArgumentError, "Vertices cannot be falsy" unless v && w
|
48
|
+
@vertices = Set.new([v, w])
|
49
|
+
end
|
50
|
+
|
51
|
+
def hash
|
52
|
+
@vertices.hash
|
53
|
+
end
|
54
|
+
|
55
|
+
def ==(other)
|
56
|
+
@vertices == other.vertices
|
57
|
+
end
|
58
|
+
alias eql? ==
|
59
|
+
|
60
|
+
def to_s
|
61
|
+
a = @vertices.to_a
|
62
|
+
"(#{a[0]}-#{a[1]})"
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe EmailGraph::DirectedGraph do
|
4
|
+
let(:edge_pairs) {[ [1, 2],
|
5
|
+
[2, 1],
|
6
|
+
[2, 3] ]}
|
7
|
+
let(:edges) { edge_pairs.map{ |p| EmailGraph::DirectedEdge.new(p[0], p[1]) } }
|
8
|
+
let(:g) do
|
9
|
+
EmailGraph::DirectedGraph.new.tap do |g|
|
10
|
+
edges.each{ |e| g.add_edge(e) }
|
11
|
+
g.add_vertex(4)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'returns all vertices' do
|
16
|
+
expect(g.vertices).to contain_exactly(1, 2, 3, 4)
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'returns all edges' do
|
20
|
+
expect(g.edges).to contain_exactly(*edges)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns a specific edge' do
|
24
|
+
expect(g.edge(1,2)).to eq(EmailGraph::DirectedEdge.new(1,2))
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'returns edges from a vertex' do
|
28
|
+
expect(g.edges_from(2).map(&:to)).to contain_exactly(1, 3)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'returns edges to a vertex' do
|
32
|
+
expect(g.edges_to(2).map(&:from)).to contain_exactly(1)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns existing edge when trying to add one with the same vertices' do
|
36
|
+
e = NewDirectedEdgeType.new(2, 3)
|
37
|
+
|
38
|
+
expect(g.add_edge(e)).to be_instance_of(EmailGraph::DirectedEdge)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'iterates through each edge and its inverse' do
|
42
|
+
expected_yield = [ [g.edge(1, 2), g.edge(2, 1)],
|
43
|
+
[g.edge(2, 3), nil] ]
|
44
|
+
|
45
|
+
expect{ |b| g.with_each_edge_and_inverse(&b) }.to yield_successive_args(*expected_yield)
|
46
|
+
end
|
47
|
+
|
48
|
+
describe '#to_undirected' do
|
49
|
+
|
50
|
+
it 'returns with the correct edges and vertices using default edge_factory' do
|
51
|
+
undirected = g.to_undirected
|
52
|
+
|
53
|
+
expected_edges = [EmailGraph::UndirectedEdge.new(1, 2)]
|
54
|
+
expect(undirected.edges).to contain_exactly(*expected_edges)
|
55
|
+
|
56
|
+
expected_vertices = [1, 2]
|
57
|
+
expect(undirected.vertices).to contain_exactly(*expected_vertices)
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'returns with the correct edges when using a provided edge_factory' do
|
61
|
+
at_least_one_way_edge_factory = Proc.new{ |e1, e2| EmailGraph::UndirectedEdge.new(e1.from, e1.to) if e1 }
|
62
|
+
|
63
|
+
undirected = g.to_undirected(&at_least_one_way_edge_factory)
|
64
|
+
|
65
|
+
expected_edges = [ EmailGraph::UndirectedEdge.new(1, 2),
|
66
|
+
EmailGraph::UndirectedEdge.new(2, 3) ]
|
67
|
+
expect(undirected.edges).to contain_exactly(*expected_edges)
|
68
|
+
|
69
|
+
expected_vertices = [1, 2, 3]
|
70
|
+
expect(undirected.vertices).to contain_exactly(*expected_vertices)
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
describe EmailGraph::DirectedEdge do
|
78
|
+
|
79
|
+
it 'raises when a vertex is nil' do
|
80
|
+
expect{ EmailGraph::DirectedEdge.new(nil, 1) }.to raise_exception(ArgumentError)
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'converts into a string' do
|
84
|
+
edge = EmailGraph::DirectedEdge.new("v", "w")
|
85
|
+
expect(edge.to_s).to eq("(v-w)")
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
class NewDirectedEdgeType < EmailGraph::DirectedEdge; end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
describe EmailGraph::InteractionGraph do
|
5
|
+
let(:a0) { OpenStruct.new(name: "a0", email: "a0@example.com") }
|
6
|
+
let(:a1) { OpenStruct.new(name: "a1", email: "a1@example.com") }
|
7
|
+
let(:msg) { OpenStruct.new(from: [a0], to: [a1], date: Time.now) }
|
8
|
+
let(:g) { EmailGraph::InteractionGraph.new }
|
9
|
+
|
10
|
+
describe '#add_message' do
|
11
|
+
|
12
|
+
it 'adds vertices (identities)' do
|
13
|
+
g.add_message(msg)
|
14
|
+
|
15
|
+
expect(g.vertices).to contain_exactly(a0.email, a1.email)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'adds edges (relationships)' do
|
19
|
+
g.add_message(msg)
|
20
|
+
|
21
|
+
expected_edge = EmailGraph::InteractionRelationship.new(a0.email, a1.email)
|
22
|
+
expect(g.edges).to contain_exactly(expected_edge)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'adds interactions to relationships' do
|
26
|
+
g.add_message(msg)
|
27
|
+
|
28
|
+
expect(g.edge(a0.email, a1.email).interactions).to contain_exactly(msg.date)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'adds interactions ignoring duplicates' do
|
32
|
+
msg_with_dups = OpenStruct.new( from: [a0],
|
33
|
+
to: [a1],
|
34
|
+
cc: [a1],
|
35
|
+
date: Time.now )
|
36
|
+
|
37
|
+
g.add_message(msg_with_dups)
|
38
|
+
|
39
|
+
expect(g.edge(a0.email, a1.email).interactions).to contain_exactly(msg_with_dups.date)
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'raises when message addresses have nil emails' do
|
43
|
+
nil_email = OpenStruct.new(name: "Nil Email", email: nil)
|
44
|
+
|
45
|
+
msg_with_nil_email = OpenStruct.new( from: [a0],
|
46
|
+
to: [nil_email],
|
47
|
+
date: Time.now )
|
48
|
+
|
49
|
+
expect{g.add_message(msg_with_nil_email)}.to raise_exception(ArgumentError)
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'normalizes emails if no processor is provided' do
|
53
|
+
a_norm = OpenStruct.new(name: "Norm", email: "test@gmail.com")
|
54
|
+
a_not_norm = OpenStruct.new(name: "Not Norm", email: "tEs.t+blah@gmail.com")
|
55
|
+
|
56
|
+
msg = OpenStruct.new(from: [a_norm], to: [a_not_norm], date: Time.now)
|
57
|
+
g.add_message(msg)
|
58
|
+
|
59
|
+
expected_edge = EmailGraph::InteractionRelationship.new('test@gmail.com', 'test@gmail.com')
|
60
|
+
expect(g.edges).to contain_exactly(expected_edge)
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'processes emails if a processor is provided' do
|
64
|
+
processor = Proc.new{ |e| "EMAIL@EMAIL.COM" }
|
65
|
+
g.add_message(msg, email_processor: processor)
|
66
|
+
|
67
|
+
expect(g.vertices).to contain_exactly("EMAIL@EMAIL.COM")
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
describe '#to_mutual_graph' do
|
73
|
+
|
74
|
+
it 'returns with the correct edges and vertices using default filter' do
|
75
|
+
g.add_message(msg)
|
76
|
+
g.add_message(OpenStruct.new(from: [a1], to: [a0], date: Time.now))
|
77
|
+
mutual_graph = g.to_mutual_graph
|
78
|
+
|
79
|
+
expected_edges = [EmailGraph::MutualRelationship.new(a0.email, a1.email)]
|
80
|
+
expect(mutual_graph.edges).to contain_exactly(*expected_edges)
|
81
|
+
|
82
|
+
expected_vertices = [a0.email, a1.email]
|
83
|
+
expect(mutual_graph.vertices).to contain_exactly(*expected_vertices)
|
84
|
+
|
85
|
+
expect(mutual_graph.edge(a0.email, a1.email).interactions.size).to eq(2)
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require_relative '../lib/email_graph.rb'
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe EmailGraph::UndirectedGraph do
|
4
|
+
let(:edge_pairs) {[ [1, 2],
|
5
|
+
[2, 3] ]}
|
6
|
+
let(:edges) { edge_pairs.map{ |p| EmailGraph::UndirectedEdge.new(p[0], p[1]) } }
|
7
|
+
let(:g) do
|
8
|
+
EmailGraph::UndirectedGraph.new.tap do |g|
|
9
|
+
edges.each{ |e| g.add_edge(e) }
|
10
|
+
g.add_vertex(4)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'returns all vertices' do
|
15
|
+
expect(g.vertices).to contain_exactly(1, 2, 3, 4)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns all edges' do
|
19
|
+
expect(g.edges).to contain_exactly(*edges)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'returns a specific edge, vertex order ignored' do
|
23
|
+
expected_edge = EmailGraph::UndirectedEdge.new(1,2)
|
24
|
+
expect(g.edge(2, 1)).to eq(expected_edge)
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'returns edges involving a vertex' do
|
28
|
+
expected_edges = [EmailGraph::UndirectedEdge.new(2,3)]
|
29
|
+
expect(g.edges_with(3)).to contain_exactly(*expected_edges)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns an existing edge if available when adding one' do
|
33
|
+
e = NewUndirectedEdgeType.new(2, 3)
|
34
|
+
|
35
|
+
expect(g.add_edge(e)).to be_instance_of(EmailGraph::UndirectedEdge)
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
describe EmailGraph::UndirectedEdge do
|
41
|
+
|
42
|
+
it 'raises when a vertex is nil' do
|
43
|
+
expect{ EmailGraph::UndirectedEdge.new(nil, 1) }.to raise_exception(ArgumentError)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'converts into a string' do
|
47
|
+
edge = EmailGraph::UndirectedEdge.new("v", "w")
|
48
|
+
expect(edge.to_s).to match(/^\((v-w|w-v)\)$/)
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
class NewUndirectedEdgeType < EmailGraph::UndirectedEdge; end
|
metadata
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: email_graph
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ryan Dick
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: normailize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.0.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: gmail_xoauth
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.4.1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.4.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.7'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.7'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 3.1.0
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 3.1.0
|
83
|
+
description:
|
84
|
+
email:
|
85
|
+
- rmdick@gmail.com
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- ".gitignore"
|
91
|
+
- Gemfile
|
92
|
+
- LICENSE.txt
|
93
|
+
- README.md
|
94
|
+
- Rakefile
|
95
|
+
- email_graph.gemspec
|
96
|
+
- lib/email_graph.rb
|
97
|
+
- lib/email_graph/directed_graph.rb
|
98
|
+
- lib/email_graph/gmail_fetcher.rb
|
99
|
+
- lib/email_graph/interaction_graph.rb
|
100
|
+
- lib/email_graph/undirected_graph.rb
|
101
|
+
- lib/email_graph/version.rb
|
102
|
+
- spec/directed_graph_spec.rb
|
103
|
+
- spec/interaction_graph_spec.rb
|
104
|
+
- spec/spec_helper.rb
|
105
|
+
- spec/undirected_graph_spec.rb
|
106
|
+
homepage: https://github.com/rymodi/email_graph
|
107
|
+
licenses:
|
108
|
+
- MIT
|
109
|
+
metadata: {}
|
110
|
+
post_install_message:
|
111
|
+
rdoc_options: []
|
112
|
+
require_paths:
|
113
|
+
- lib
|
114
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0'
|
119
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
requirements: []
|
125
|
+
rubyforge_project:
|
126
|
+
rubygems_version: 2.2.2
|
127
|
+
signing_key:
|
128
|
+
specification_version: 4
|
129
|
+
summary: Graph data from emails.
|
130
|
+
test_files:
|
131
|
+
- spec/directed_graph_spec.rb
|
132
|
+
- spec/interaction_graph_spec.rb
|
133
|
+
- spec/spec_helper.rb
|
134
|
+
- spec/undirected_graph_spec.rb
|
135
|
+
has_rdoc:
|