pst 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ .#*
6
+ \#*#
7
+ vendor/jars/*.jar
8
+ test/data/*.pst
9
+ .DS_Store
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in pst.gemspec
4
+ gemspec
5
+
6
+ gem 'rspec'
@@ -0,0 +1,44 @@
1
+ # pst.rb
2
+
3
+ ## Description
4
+
5
+ JRuby gem for working with Outlook PST files. Syntactic sugar over
6
+ `java-libpst`
7
+
8
+ ## Example
9
+
10
+ pstfile = Pff::PSTFile.new("foo.pst")
11
+
12
+ # iterate over all (nested) folders
13
+ folders = pstfile.root.sub_folders.inject({}){|acc,f|
14
+ acc[f.name] = f
15
+ acc
16
+ }
17
+
18
+ folder = folders["Inbox"]
19
+ email = folder.children.first
20
+
21
+ pp email.subject # -> "Re: obama's new debt plan"
22
+ pp email.num_recipients # -> 2
23
+
24
+ # handy human ids and hash strings
25
+ pp email.human_id "nates-mails:/path/to/foo.pst:/Top of Personal Folders/Inbox:Fri Aug 03 01:02:00 PDT 2011:<foo@bar.com>:Re: obama's new debt plan")
26
+ pp email.hash_string # -> "f161dd2a45952784c440bd5879684ae89b8b0523"
27
+
28
+ recipient = email.recipients.first
29
+ pp recipient.name # -> "Nate Murray"
30
+ pp recipient.email # -> "nate@natemurray.com
31
+
32
+ ## References
33
+
34
+ The heavy lifting is done by Richard Johnson's java-libpst. Bless
35
+ him for working out the protocol details so I didn't have to.
36
+
37
+ * http://code.google.com/p/java-libpst/
38
+ * http://www.rjohnson.id.au/wordpress/2010/01/26/java-libpst-pst2gmail/
39
+
40
+ # Author
41
+
42
+ Nate Murray <nate@natemurray.com>
43
+ http://www.xcombinator.com
44
+
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
@@ -0,0 +1,22 @@
1
+ here = File.expand_path(File.dirname(__FILE__))
2
+ $LOAD_PATH << "#{here}"
3
+
4
+ require 'rubygems'
5
+ require 'pp'
6
+ require 'java'
7
+ require 'andand'
8
+
9
+ module Pst; end
10
+
11
+ jars_dir = File.dirname(__FILE__) + "/../vendor/jars"
12
+ $LOAD_PATH << jars_dir
13
+
14
+ Dir.entries(jars_dir).sort.each do |entry|
15
+ if entry =~ /.jar$/
16
+ require entry
17
+ end
18
+ end
19
+
20
+ require "pst/extensions"
21
+ require "pst/version"
22
+ require "pst/base"
@@ -0,0 +1,210 @@
1
+ require 'digest/sha1'
2
+
3
+ class Java::ComPff::PSTFile
4
+ attr_accessor :collection
5
+ attr_accessor :filename
6
+ alias_method :file, :getFileHandle
7
+
8
+ def initialize(name)
9
+ super(name)
10
+ @filename = name
11
+ end
12
+
13
+ def name
14
+ self.getMessageStore.getDisplayName
15
+ end
16
+
17
+ def root
18
+ f = self.getRootFolder
19
+ f.file = self
20
+ f
21
+ end
22
+ end
23
+
24
+ class Java::ComPff::PSTFolder
25
+ attr_accessor :file
26
+ attr_reader :parent
27
+ alias_method :subfolder_count, :getSubFolderCount
28
+ alias_method :email_count, :getContentCount
29
+
30
+ def name
31
+ self.getDisplayName
32
+ end
33
+
34
+ def sub_folders
35
+ Enumerator.new do |yielder|
36
+ self.getSubFolders.each do |f|
37
+ f.parent = self
38
+ yielder.yield f
39
+ f.sub_folders.each do |fc|
40
+ yielder.yield fc
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ def children
47
+ # this doesn't work dont use it. it doesn't work because
48
+ # Enumerator does some sort of non-deterministic lookaheads
49
+ # that move the cursor out from underneith the underlying
50
+ # java-pst library
51
+ #
52
+ # Maybe once I understand Enumerator better we can fix this.
53
+ raise "TODO"
54
+ Enumerator.new do |yielder|
55
+ max = self.email_count
56
+ idx = 0
57
+ while idx < max
58
+ self.moveChildCursorTo(idx)
59
+ kid = self.getNextChild
60
+ kid.folder = self
61
+ yielder.yield kid
62
+ idx = idx + 1
63
+ end
64
+ end
65
+ end
66
+
67
+ def filename
68
+ self.file.filename
69
+ end
70
+
71
+ def path
72
+ levels = [self.name]
73
+ f = self
74
+ while p = f.parent
75
+ levels << p.name
76
+ f = p
77
+ end
78
+ levels.reverse.join("/")
79
+ end
80
+
81
+ def human_id
82
+ "%s:%s:%s" % [self.file.collection || "no-collection", filename, path]
83
+ end
84
+
85
+ def hash_string
86
+ Digest::SHA1.hexdigest(human_id)
87
+ end
88
+
89
+ def parent=(the_parent)
90
+ @parent = the_parent
91
+ self.file = the_parent.file
92
+ end
93
+
94
+ def creation_time
95
+ t = self.getCreationTime || self.getLastModificationTime
96
+ t.andand.to_time
97
+ end
98
+
99
+ end
100
+
101
+ class Java::ComPff::PSTMessage
102
+ attr_accessor :folder
103
+
104
+ alias_method :subject, :getSubject
105
+ alias_method :display_to, :getDisplayTo
106
+ alias_method :num_recipients, :getNumberOfRecipients
107
+ alias_method :num_attachments, :getNumberOfAttachments
108
+ alias_method :sender_name, :getSenderName
109
+ alias_method :sender_email, :getSenderEmailAddress
110
+ alias_method :original_subject, :getOriginalSubject
111
+ #alias_method :body, :getBody
112
+ alias_method :html_body, :getBodyHTML
113
+
114
+ # things to pay attention to
115
+ # next.getDescriptorNode().descriptorIdentifier+"";
116
+ # next.getSentRepresentingName() + " <"+ next.getSentRepresentingEmailAddress() +">";
117
+ # next.getReceivedByName() + " <"+next.getReceivedByAddress()+">" +
118
+ # next.displayTo();
119
+ # next.getClientSubmitTime();
120
+
121
+ def human_id
122
+ "%s:%s:%s:%s" % [ folder.human_id, self.getClientSubmitTime.to_s, self.getInternetMessageId, self.subject ]
123
+ end
124
+
125
+ def hash_string
126
+ Digest::SHA1.hexdigest(human_id)
127
+ end
128
+
129
+ def pretty_string
130
+ "[%s] %s - %s <%s> %s <%s> %s %s a:%s" % [
131
+ self.getDescriptorNode.descriptorIdentifier,
132
+ self.getSubject,
133
+ self.getSentRepresentingName,
134
+ self.getSentRepresentingEmailAddress,
135
+ self.getReceivedByName,
136
+ self.getReceivedByAddress,
137
+ self.displayTo,
138
+ self.getClientSubmitTime,
139
+ self.hasAttachments]
140
+ end
141
+
142
+ def recipients
143
+ recip = []
144
+ #Enumerator.new do |yielder|
145
+ i = 0
146
+ while i < self.getNumberOfRecipients
147
+ recipient = self.getRecipient(i)
148
+ recip << recipient
149
+ i = i + 1
150
+ end
151
+ #end
152
+ recip
153
+ end
154
+
155
+ def sent_at
156
+ self.getClientSubmitTime.andand.to_time
157
+ end
158
+
159
+ def contents
160
+ # this is because [Pff::PSTContact, Pff::PSTTask, Pff::PSTActivity, Pff::PSTRss]
161
+ # are all PSTMessages but they throw a npe if you call getBody
162
+ begin
163
+ return self.getBody
164
+ rescue
165
+ end
166
+ begin
167
+ return self.toString
168
+ rescue
169
+ end
170
+ raise "no contents found in #{self}"
171
+ end
172
+
173
+ def calculated_recipients_string
174
+ self.recipients.collect{|r| r.pretty_string}.join(", ")
175
+ end
176
+
177
+ def recipients_string
178
+ orig = self.getRecipientsString
179
+ if orig == "No recipients table!"
180
+ calculated_recipients_string
181
+ else
182
+ orig
183
+ end
184
+ end
185
+ end
186
+
187
+ class Java::ComPff::PSTRecipient
188
+ alias_method :name, :getDisplayName
189
+ alias_method :email, :getEmailAddress
190
+ alias_method :smtp, :getSmtpAddress
191
+
192
+ def pretty_string
193
+ "%s <%s>" % [name, email]
194
+ end
195
+
196
+ def human_id
197
+ pretty_string
198
+ end
199
+
200
+ def hash_string
201
+ Digest::SHA1.hexdigest(human_id)
202
+ end
203
+ end
204
+
205
+ class Java::ComPff::PSTAttachment
206
+ # todo hash
207
+ def pretty_string
208
+ "[%s] %s <%s, %d>" % [self.getContentId, self.getFilename, self.getMimeTag, self.getSize]
209
+ end
210
+ end
@@ -0,0 +1,9 @@
1
+ class Enumerator
2
+ def lazy_select(&block)
3
+ Enumerator.new do |yielder|
4
+ self.each do |val|
5
+ yielder.yield(val) if block.call(val)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ module Pst
2
+ VERSION = "0.0.1"
3
+ end
data/pom.xml ADDED
@@ -0,0 +1,52 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project>
3
+ <modelVersion>4.0.0</modelVersion>
4
+ <groupId>com.xcombinator</groupId>
5
+ <artifactId>pst</artifactId>
6
+ <version>0.0.1</version>
7
+ <name>pst</name>
8
+ <description></description>
9
+ <repositories>
10
+ <repository>
11
+ <id>clojars</id>
12
+ <url>http://clojars.org/repo</url>
13
+ </repository>
14
+ <repository>
15
+ <id>central</id>
16
+ <url>http://repo1.maven.org/maven2</url>
17
+ </repository>
18
+ </repositories>
19
+ <dependencies>
20
+ <dependency>
21
+ <groupId>com.pff</groupId>
22
+ <artifactId>java-libpst</artifactId>
23
+ <version>1.0.0</version>
24
+ <!--
25
+ <exclusions>
26
+ <exclusion>
27
+ <groupId>org.slf4j</groupId>
28
+ <artifactId>org.slf4j/slf4j-api</artifactId>
29
+ </exclusion>
30
+ </exclusions>
31
+ -->
32
+ </dependency>
33
+ </dependencies>
34
+ <build>
35
+ <plugins>
36
+ <plugin> <!-- just run `mvn process-sources` -->
37
+ <artifactId>maven-dependency-plugin</artifactId>
38
+ <executions>
39
+ <execution>
40
+ <phase>process-sources</phase>
41
+ <goals>
42
+ <goal>copy-dependencies</goal>
43
+ </goals>
44
+ <configuration>
45
+ <outputDirectory>${project.basedir}/vendor/jars</outputDirectory>
46
+ </configuration>
47
+ </execution>
48
+ </executions>
49
+ </plugin>
50
+ </plugins>
51
+ </build>
52
+ </project>
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "pst/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "pst"
7
+ s.version = Pst::VERSION
8
+ s.authors = ["Nate Murray"]
9
+ s.email = ["nate@natemurray.com"]
10
+ s.homepage = "http://www.xcombinator.com/"
11
+ s.summary = %q{Syntactic sugar over java-libpst}
12
+ s.description = %q{Syntactic sugar over java-libpst.}
13
+
14
+ s.rubyforge_project = "pst.rb"
15
+ s.add_dependency("andand")
16
+
17
+ s.files = `git ls-files`.split("\n") + `find vendor/jars -type f -name *.jar`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+ end
@@ -0,0 +1,139 @@
1
+ require 'spec_helper'
2
+ testdatadir = File.dirname(__FILE__) + "/../test/data"
3
+
4
+ Pff = Java::ComPff
5
+
6
+ describe "PST" do
7
+
8
+ before(:all) do
9
+ @filename = testdatadir + "/albert_meyers_000.pst"
10
+ @pstfile = Pff::PSTFile.new(@filename)
11
+
12
+ @folders = @pstfile.root.sub_folders.inject({}){|acc,f|
13
+ acc[f.name] = f
14
+ acc
15
+ }
16
+
17
+ end
18
+
19
+ context "PSTFile" do
20
+
21
+ it "should have a name" do
22
+ @pstfile.name.should eql("albert_meyers_000")
23
+ end
24
+
25
+ it "should have a filename" do
26
+ @pstfile.filename.should eql(@filename)
27
+ end
28
+
29
+ it "should have a root" do
30
+ @pstfile.root.should_not be_nil
31
+ end
32
+
33
+ it "should tell root about itself" do
34
+ @pstfile.root.file.should eql(@pstfile)
35
+ @pstfile.root.file.name.should eql(@pstfile.name)
36
+ end
37
+ end
38
+
39
+ context "PSTFolder" do
40
+
41
+ before(:all) do
42
+
43
+ end
44
+
45
+ it "should have subfolder count" do
46
+ @folders["Top of Personal Folders"].subfolder_count.should eql(5)
47
+ end
48
+
49
+ it "should know the number of immediate emails" do
50
+ #pp @folders["Deleted Items"].email_count
51
+ end
52
+
53
+ it "should have creation time" do
54
+ # pp @folders["Inbox"].creation_time
55
+ end
56
+
57
+ it "should have sub folders" do
58
+ @folders.should have_key("ExMerge - Meyers, Albert")
59
+ @folders.should have_key("meyers-a")
60
+ end
61
+
62
+ it "should have content counts" do
63
+ @folders["Deleted Items"].getContentCount.should eql(1130)
64
+ @folders["Inbox"].getContentCount.should eql(22)
65
+ end
66
+
67
+ it "should have a path" do
68
+ @folders["Inbox"].path.should eql("/Top of Personal Folders/Inbox")
69
+ end
70
+
71
+ it "should have a hash string" do
72
+ @folders["Inbox"].human_id.should eql("no-collection:/Users/nmurray/projects/enron/software/pst.rb/spec/../test/data/albert_meyers_000.pst:/Top of Personal Folders/Inbox")
73
+ @folders["Inbox"].hash_string.should eql("767d47f8134cd5c14786efd0274586b1065278e7")
74
+ end
75
+
76
+ end
77
+
78
+ context "PSTMessage" do
79
+ before(:all) do
80
+ @folder = @folders["Deleted Items"]
81
+ @email = @folder.children.first
82
+ end
83
+
84
+ it "should have basic attributes" do
85
+ @email.subject.should eql("Re: deal 539246.1 REliant HLP dms 7634/7636")
86
+ @email.display_to.should eql("Joy Werner")
87
+ end
88
+
89
+ it "should know about its folder" do
90
+ @email.folder.should eql(@folder)
91
+ end
92
+
93
+ it "should have an id" do
94
+ @email.human_id.should eql("no-collection:/Users/nmurray/projects/enron/software/pst.rb/spec/../test/data/albert_meyers_000.pst:/Top of Personal Folders/Deleted Items:Fri Apr 06 01:02:00 PDT 2001:<ML1KCRAP2G52RFDSYPSFSAQ0J30PDFMMB@zlsvr22>:Re: deal 539246.1 REliant HLP dms 7634/7636")
95
+ @email.hash_string.should eql("c512b175785b28532146be7cdb165a5bbee4d130")
96
+ # pp @email.pretty_string
97
+ end
98
+
99
+ it "should have the number of recipients" do
100
+ @email.getNumberOfRecipients.should eql(1)
101
+ end
102
+
103
+ it "should iterate over recipients" do
104
+ @email.recipients.count.should eql(1)
105
+ #@email.recipients.each do |r|
106
+ # pp r
107
+ #end
108
+ end
109
+
110
+ end
111
+
112
+ describe "PSTRecipient" do
113
+ before(:all) do
114
+ @folder = @folders["Deleted Items"]
115
+ @email = @folder.children.take(5).last
116
+
117
+ @recipients = @email.recipients.inject({}){|acc,r|
118
+ acc[r.name] = r
119
+ acc
120
+ }
121
+ end
122
+
123
+ it "should have a name" do
124
+ @recipients.should have_key("Volume Management")
125
+ @recipients.should have_key("Williams III")
126
+ @recipients.should have_key("Bill")
127
+ end
128
+
129
+ it "should have an email field" do
130
+ @recipients["Williams III"].email.should eql("Williams III")
131
+ @recipients["Bill"].email.should eql("/O=ENRON/OU=NA/CN=RECIPIENTS/CN=Bwillia5")
132
+ end
133
+
134
+ it "should have a hash string" do
135
+ @recipients["Bill"].hash_string.should eql("f161dd2a45952784c440bd5879684ae89b8b0523")
136
+ end
137
+ end
138
+
139
+ end
@@ -0,0 +1,8 @@
1
+ $:.unshift(File.expand_path(File.dirname(__FILE__))+ "/../lib")
2
+ require 'rubygems'
3
+ require 'pst'
4
+ require 'bundler/setup'
5
+
6
+ RSpec.configure do |config|
7
+ # some (optional) config here
8
+ end
File without changes
@@ -0,0 +1,4 @@
1
+ # I haven't put the test PST into git because of the filesize. To download try here:
2
+ # contact me if the link breaks <nate@natemurray.com>
3
+
4
+ curl -0 http://www.xcombinator.com/wp-content/uploads/2011/05/albert_meyers_000.pst > test/data/albert_meyers_000.pst
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pst
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Nate Murray
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-08-27 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: andand
16
+ requirement: &2153586440 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2153586440
25
+ description: Syntactic sugar over java-libpst.
26
+ email:
27
+ - nate@natemurray.com
28
+ executables: []
29
+ extensions: []
30
+ extra_rdoc_files: []
31
+ files:
32
+ - .gitignore
33
+ - Gemfile
34
+ - README.mkd
35
+ - Rakefile
36
+ - lib/pst.rb
37
+ - lib/pst/base.rb
38
+ - lib/pst/extensions.rb
39
+ - lib/pst/version.rb
40
+ - pom.xml
41
+ - pst.gemspec
42
+ - spec/pst_spec.rb
43
+ - spec/spec_helper.rb
44
+ - test/data/.gitkeep
45
+ - test/data/README
46
+ - vendor/jars/java-libpst-1.0.0.jar
47
+ homepage: http://www.xcombinator.com/
48
+ licenses: []
49
+ post_install_message:
50
+ rdoc_options: []
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubyforge_project: pst.rb
67
+ rubygems_version: 1.8.6
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: Syntactic sugar over java-libpst
71
+ test_files:
72
+ - spec/pst_spec.rb
73
+ - spec/spec_helper.rb
74
+ - test/data/.gitkeep
75
+ - test/data/README