pst 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,9 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ .#*
6
+ \#*#
7
+ vendor/jars/*.jar
8
+ test/data/*.pst
9
+ .DS_Store
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in pst.gemspec
4
+ gemspec
5
+
6
+ gem 'rspec'
@@ -0,0 +1,44 @@
1
+ # pst.rb
2
+
3
+ ## Description
4
+
5
+ JRuby gem for working with Outlook PST files. Syntactic sugar over
6
+ `java-libpst`
7
+
8
+ ## Example
9
+
10
+ pstfile = Pff::PSTFile.new("foo.pst")
11
+
12
+ # iterate over all (nested) folders
13
+ folders = pstfile.root.sub_folders.inject({}){|acc,f|
14
+ acc[f.name] = f
15
+ acc
16
+ }
17
+
18
+ folder = folders["Inbox"]
19
+ email = folder.children.first
20
+
21
+ pp email.subject # -> "Re: obama's new debt plan"
22
+ pp email.num_recipients # -> 2
23
+
24
+ # handy human ids and hash strings
25
+ pp email.human_id "nates-mails:/path/to/foo.pst:/Top of Personal Folders/Inbox:Fri Aug 03 01:02:00 PDT 2011:<foo@bar.com>:Re: obama's new debt plan")
26
+ pp email.hash_string # -> "f161dd2a45952784c440bd5879684ae89b8b0523"
27
+
28
+ recipient = email.recipients.first
29
+ pp recipient.name # -> "Nate Murray"
30
+ pp recipient.email # -> "nate@natemurray.com
31
+
32
+ ## References
33
+
34
+ The heavy lifting is done by Richard Johnson's java-libpst. Bless
35
+ him for working out the protocol details so I didn't have to.
36
+
37
+ * http://code.google.com/p/java-libpst/
38
+ * http://www.rjohnson.id.au/wordpress/2010/01/26/java-libpst-pst2gmail/
39
+
40
+ # Author
41
+
42
+ Nate Murray <nate@natemurray.com>
43
+ http://www.xcombinator.com
44
+
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
@@ -0,0 +1,22 @@
1
+ here = File.expand_path(File.dirname(__FILE__))
2
+ $LOAD_PATH << "#{here}"
3
+
4
+ require 'rubygems'
5
+ require 'pp'
6
+ require 'java'
7
+ require 'andand'
8
+
9
+ module Pst; end
10
+
11
+ jars_dir = File.dirname(__FILE__) + "/../vendor/jars"
12
+ $LOAD_PATH << jars_dir
13
+
14
+ Dir.entries(jars_dir).sort.each do |entry|
15
+ if entry =~ /.jar$/
16
+ require entry
17
+ end
18
+ end
19
+
20
+ require "pst/extensions"
21
+ require "pst/version"
22
+ require "pst/base"
@@ -0,0 +1,210 @@
1
+ require 'digest/sha1'
2
+
3
+ class Java::ComPff::PSTFile
4
+ attr_accessor :collection
5
+ attr_accessor :filename
6
+ alias_method :file, :getFileHandle
7
+
8
+ def initialize(name)
9
+ super(name)
10
+ @filename = name
11
+ end
12
+
13
+ def name
14
+ self.getMessageStore.getDisplayName
15
+ end
16
+
17
+ def root
18
+ f = self.getRootFolder
19
+ f.file = self
20
+ f
21
+ end
22
+ end
23
+
24
+ class Java::ComPff::PSTFolder
25
+ attr_accessor :file
26
+ attr_reader :parent
27
+ alias_method :subfolder_count, :getSubFolderCount
28
+ alias_method :email_count, :getContentCount
29
+
30
+ def name
31
+ self.getDisplayName
32
+ end
33
+
34
+ def sub_folders
35
+ Enumerator.new do |yielder|
36
+ self.getSubFolders.each do |f|
37
+ f.parent = self
38
+ yielder.yield f
39
+ f.sub_folders.each do |fc|
40
+ yielder.yield fc
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ def children
47
+ # this doesn't work dont use it. it doesn't work because
48
+ # Enumerator does some sort of non-deterministic lookaheads
49
+ # that move the cursor out from underneith the underlying
50
+ # java-pst library
51
+ #
52
+ # Maybe once I understand Enumerator better we can fix this.
53
+ raise "TODO"
54
+ Enumerator.new do |yielder|
55
+ max = self.email_count
56
+ idx = 0
57
+ while idx < max
58
+ self.moveChildCursorTo(idx)
59
+ kid = self.getNextChild
60
+ kid.folder = self
61
+ yielder.yield kid
62
+ idx = idx + 1
63
+ end
64
+ end
65
+ end
66
+
67
+ def filename
68
+ self.file.filename
69
+ end
70
+
71
+ def path
72
+ levels = [self.name]
73
+ f = self
74
+ while p = f.parent
75
+ levels << p.name
76
+ f = p
77
+ end
78
+ levels.reverse.join("/")
79
+ end
80
+
81
+ def human_id
82
+ "%s:%s:%s" % [self.file.collection || "no-collection", filename, path]
83
+ end
84
+
85
+ def hash_string
86
+ Digest::SHA1.hexdigest(human_id)
87
+ end
88
+
89
+ def parent=(the_parent)
90
+ @parent = the_parent
91
+ self.file = the_parent.file
92
+ end
93
+
94
+ def creation_time
95
+ t = self.getCreationTime || self.getLastModificationTime
96
+ t.andand.to_time
97
+ end
98
+
99
+ end
100
+
101
+ class Java::ComPff::PSTMessage
102
+ attr_accessor :folder
103
+
104
+ alias_method :subject, :getSubject
105
+ alias_method :display_to, :getDisplayTo
106
+ alias_method :num_recipients, :getNumberOfRecipients
107
+ alias_method :num_attachments, :getNumberOfAttachments
108
+ alias_method :sender_name, :getSenderName
109
+ alias_method :sender_email, :getSenderEmailAddress
110
+ alias_method :original_subject, :getOriginalSubject
111
+ #alias_method :body, :getBody
112
+ alias_method :html_body, :getBodyHTML
113
+
114
+ # things to pay attention to
115
+ # next.getDescriptorNode().descriptorIdentifier+"";
116
+ # next.getSentRepresentingName() + " <"+ next.getSentRepresentingEmailAddress() +">";
117
+ # next.getReceivedByName() + " <"+next.getReceivedByAddress()+">" +
118
+ # next.displayTo();
119
+ # next.getClientSubmitTime();
120
+
121
+ def human_id
122
+ "%s:%s:%s:%s" % [ folder.human_id, self.getClientSubmitTime.to_s, self.getInternetMessageId, self.subject ]
123
+ end
124
+
125
+ def hash_string
126
+ Digest::SHA1.hexdigest(human_id)
127
+ end
128
+
129
+ def pretty_string
130
+ "[%s] %s - %s <%s> %s <%s> %s %s a:%s" % [
131
+ self.getDescriptorNode.descriptorIdentifier,
132
+ self.getSubject,
133
+ self.getSentRepresentingName,
134
+ self.getSentRepresentingEmailAddress,
135
+ self.getReceivedByName,
136
+ self.getReceivedByAddress,
137
+ self.displayTo,
138
+ self.getClientSubmitTime,
139
+ self.hasAttachments]
140
+ end
141
+
142
+ def recipients
143
+ recip = []
144
+ #Enumerator.new do |yielder|
145
+ i = 0
146
+ while i < self.getNumberOfRecipients
147
+ recipient = self.getRecipient(i)
148
+ recip << recipient
149
+ i = i + 1
150
+ end
151
+ #end
152
+ recip
153
+ end
154
+
155
+ def sent_at
156
+ self.getClientSubmitTime.andand.to_time
157
+ end
158
+
159
+ def contents
160
+ # this is because [Pff::PSTContact, Pff::PSTTask, Pff::PSTActivity, Pff::PSTRss]
161
+ # are all PSTMessages but they throw a npe if you call getBody
162
+ begin
163
+ return self.getBody
164
+ rescue
165
+ end
166
+ begin
167
+ return self.toString
168
+ rescue
169
+ end
170
+ raise "no contents found in #{self}"
171
+ end
172
+
173
+ def calculated_recipients_string
174
+ self.recipients.collect{|r| r.pretty_string}.join(", ")
175
+ end
176
+
177
+ def recipients_string
178
+ orig = self.getRecipientsString
179
+ if orig == "No recipients table!"
180
+ calculated_recipients_string
181
+ else
182
+ orig
183
+ end
184
+ end
185
+ end
186
+
187
+ class Java::ComPff::PSTRecipient
188
+ alias_method :name, :getDisplayName
189
+ alias_method :email, :getEmailAddress
190
+ alias_method :smtp, :getSmtpAddress
191
+
192
+ def pretty_string
193
+ "%s <%s>" % [name, email]
194
+ end
195
+
196
+ def human_id
197
+ pretty_string
198
+ end
199
+
200
+ def hash_string
201
+ Digest::SHA1.hexdigest(human_id)
202
+ end
203
+ end
204
+
205
+ class Java::ComPff::PSTAttachment
206
+ # todo hash
207
+ def pretty_string
208
+ "[%s] %s <%s, %d>" % [self.getContentId, self.getFilename, self.getMimeTag, self.getSize]
209
+ end
210
+ end
@@ -0,0 +1,9 @@
1
+ class Enumerator
2
+ def lazy_select(&block)
3
+ Enumerator.new do |yielder|
4
+ self.each do |val|
5
+ yielder.yield(val) if block.call(val)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ module Pst
2
+ VERSION = "0.0.1"
3
+ end
data/pom.xml ADDED
@@ -0,0 +1,52 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project>
3
+ <modelVersion>4.0.0</modelVersion>
4
+ <groupId>com.xcombinator</groupId>
5
+ <artifactId>pst</artifactId>
6
+ <version>0.0.1</version>
7
+ <name>pst</name>
8
+ <description></description>
9
+ <repositories>
10
+ <repository>
11
+ <id>clojars</id>
12
+ <url>http://clojars.org/repo</url>
13
+ </repository>
14
+ <repository>
15
+ <id>central</id>
16
+ <url>http://repo1.maven.org/maven2</url>
17
+ </repository>
18
+ </repositories>
19
+ <dependencies>
20
+ <dependency>
21
+ <groupId>com.pff</groupId>
22
+ <artifactId>java-libpst</artifactId>
23
+ <version>1.0.0</version>
24
+ <!--
25
+ <exclusions>
26
+ <exclusion>
27
+ <groupId>org.slf4j</groupId>
28
+ <artifactId>org.slf4j/slf4j-api</artifactId>
29
+ </exclusion>
30
+ </exclusions>
31
+ -->
32
+ </dependency>
33
+ </dependencies>
34
+ <build>
35
+ <plugins>
36
+ <plugin> <!-- just run `mvn process-sources` -->
37
+ <artifactId>maven-dependency-plugin</artifactId>
38
+ <executions>
39
+ <execution>
40
+ <phase>process-sources</phase>
41
+ <goals>
42
+ <goal>copy-dependencies</goal>
43
+ </goals>
44
+ <configuration>
45
+ <outputDirectory>${project.basedir}/vendor/jars</outputDirectory>
46
+ </configuration>
47
+ </execution>
48
+ </executions>
49
+ </plugin>
50
+ </plugins>
51
+ </build>
52
+ </project>
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "pst/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "pst"
7
+ s.version = Pst::VERSION
8
+ s.authors = ["Nate Murray"]
9
+ s.email = ["nate@natemurray.com"]
10
+ s.homepage = "http://www.xcombinator.com/"
11
+ s.summary = %q{Syntactic sugar over java-libpst}
12
+ s.description = %q{Syntactic sugar over java-libpst.}
13
+
14
+ s.rubyforge_project = "pst.rb"
15
+ s.add_dependency("andand")
16
+
17
+ s.files = `git ls-files`.split("\n") + `find vendor/jars -type f -name *.jar`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+ end
@@ -0,0 +1,139 @@
1
+ require 'spec_helper'
2
+ testdatadir = File.dirname(__FILE__) + "/../test/data"
3
+
4
+ Pff = Java::ComPff
5
+
6
+ describe "PST" do
7
+
8
+ before(:all) do
9
+ @filename = testdatadir + "/albert_meyers_000.pst"
10
+ @pstfile = Pff::PSTFile.new(@filename)
11
+
12
+ @folders = @pstfile.root.sub_folders.inject({}){|acc,f|
13
+ acc[f.name] = f
14
+ acc
15
+ }
16
+
17
+ end
18
+
19
+ context "PSTFile" do
20
+
21
+ it "should have a name" do
22
+ @pstfile.name.should eql("albert_meyers_000")
23
+ end
24
+
25
+ it "should have a filename" do
26
+ @pstfile.filename.should eql(@filename)
27
+ end
28
+
29
+ it "should have a root" do
30
+ @pstfile.root.should_not be_nil
31
+ end
32
+
33
+ it "should tell root about itself" do
34
+ @pstfile.root.file.should eql(@pstfile)
35
+ @pstfile.root.file.name.should eql(@pstfile.name)
36
+ end
37
+ end
38
+
39
+ context "PSTFolder" do
40
+
41
+ before(:all) do
42
+
43
+ end
44
+
45
+ it "should have subfolder count" do
46
+ @folders["Top of Personal Folders"].subfolder_count.should eql(5)
47
+ end
48
+
49
+ it "should know the number of immediate emails" do
50
+ #pp @folders["Deleted Items"].email_count
51
+ end
52
+
53
+ it "should have creation time" do
54
+ # pp @folders["Inbox"].creation_time
55
+ end
56
+
57
+ it "should have sub folders" do
58
+ @folders.should have_key("ExMerge - Meyers, Albert")
59
+ @folders.should have_key("meyers-a")
60
+ end
61
+
62
+ it "should have content counts" do
63
+ @folders["Deleted Items"].getContentCount.should eql(1130)
64
+ @folders["Inbox"].getContentCount.should eql(22)
65
+ end
66
+
67
+ it "should have a path" do
68
+ @folders["Inbox"].path.should eql("/Top of Personal Folders/Inbox")
69
+ end
70
+
71
+ it "should have a hash string" do
72
+ @folders["Inbox"].human_id.should eql("no-collection:/Users/nmurray/projects/enron/software/pst.rb/spec/../test/data/albert_meyers_000.pst:/Top of Personal Folders/Inbox")
73
+ @folders["Inbox"].hash_string.should eql("767d47f8134cd5c14786efd0274586b1065278e7")
74
+ end
75
+
76
+ end
77
+
78
+ context "PSTMessage" do
79
+ before(:all) do
80
+ @folder = @folders["Deleted Items"]
81
+ @email = @folder.children.first
82
+ end
83
+
84
+ it "should have basic attributes" do
85
+ @email.subject.should eql("Re: deal 539246.1 REliant HLP dms 7634/7636")
86
+ @email.display_to.should eql("Joy Werner")
87
+ end
88
+
89
+ it "should know about its folder" do
90
+ @email.folder.should eql(@folder)
91
+ end
92
+
93
+ it "should have an id" do
94
+ @email.human_id.should eql("no-collection:/Users/nmurray/projects/enron/software/pst.rb/spec/../test/data/albert_meyers_000.pst:/Top of Personal Folders/Deleted Items:Fri Apr 06 01:02:00 PDT 2001:<ML1KCRAP2G52RFDSYPSFSAQ0J30PDFMMB@zlsvr22>:Re: deal 539246.1 REliant HLP dms 7634/7636")
95
+ @email.hash_string.should eql("c512b175785b28532146be7cdb165a5bbee4d130")
96
+ # pp @email.pretty_string
97
+ end
98
+
99
+ it "should have the number of recipients" do
100
+ @email.getNumberOfRecipients.should eql(1)
101
+ end
102
+
103
+ it "should iterate over recipients" do
104
+ @email.recipients.count.should eql(1)
105
+ #@email.recipients.each do |r|
106
+ # pp r
107
+ #end
108
+ end
109
+
110
+ end
111
+
112
+ describe "PSTRecipient" do
113
+ before(:all) do
114
+ @folder = @folders["Deleted Items"]
115
+ @email = @folder.children.take(5).last
116
+
117
+ @recipients = @email.recipients.inject({}){|acc,r|
118
+ acc[r.name] = r
119
+ acc
120
+ }
121
+ end
122
+
123
+ it "should have a name" do
124
+ @recipients.should have_key("Volume Management")
125
+ @recipients.should have_key("Williams III")
126
+ @recipients.should have_key("Bill")
127
+ end
128
+
129
+ it "should have an email field" do
130
+ @recipients["Williams III"].email.should eql("Williams III")
131
+ @recipients["Bill"].email.should eql("/O=ENRON/OU=NA/CN=RECIPIENTS/CN=Bwillia5")
132
+ end
133
+
134
+ it "should have a hash string" do
135
+ @recipients["Bill"].hash_string.should eql("f161dd2a45952784c440bd5879684ae89b8b0523")
136
+ end
137
+ end
138
+
139
+ end
@@ -0,0 +1,8 @@
1
+ $:.unshift(File.expand_path(File.dirname(__FILE__))+ "/../lib")
2
+ require 'rubygems'
3
+ require 'pst'
4
+ require 'bundler/setup'
5
+
6
+ RSpec.configure do |config|
7
+ # some (optional) config here
8
+ end
File without changes
@@ -0,0 +1,4 @@
1
+ # I haven't put the test PST into git because of the filesize. To download try here:
2
+ # contact me if the link breaks <nate@natemurray.com>
3
+
4
+ curl -0 http://www.xcombinator.com/wp-content/uploads/2011/05/albert_meyers_000.pst > test/data/albert_meyers_000.pst
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pst
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Nate Murray
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-08-27 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: andand
16
+ requirement: &2153586440 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2153586440
25
+ description: Syntactic sugar over java-libpst.
26
+ email:
27
+ - nate@natemurray.com
28
+ executables: []
29
+ extensions: []
30
+ extra_rdoc_files: []
31
+ files:
32
+ - .gitignore
33
+ - Gemfile
34
+ - README.mkd
35
+ - Rakefile
36
+ - lib/pst.rb
37
+ - lib/pst/base.rb
38
+ - lib/pst/extensions.rb
39
+ - lib/pst/version.rb
40
+ - pom.xml
41
+ - pst.gemspec
42
+ - spec/pst_spec.rb
43
+ - spec/spec_helper.rb
44
+ - test/data/.gitkeep
45
+ - test/data/README
46
+ - vendor/jars/java-libpst-1.0.0.jar
47
+ homepage: http://www.xcombinator.com/
48
+ licenses: []
49
+ post_install_message:
50
+ rdoc_options: []
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubyforge_project: pst.rb
67
+ rubygems_version: 1.8.6
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: Syntactic sugar over java-libpst
71
+ test_files:
72
+ - spec/pst_spec.rb
73
+ - spec/spec_helper.rb
74
+ - test/data/.gitkeep
75
+ - test/data/README