chronicle-email 0.2.0 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f9096dfd9082ccbbf59651fbeb3f7d8bbd6b00a246b1cff5094ce196f2a201ec
4
- data.tar.gz: f3a702a0f56ce3301345c7be20cf2fdff5722354ef4e6f34aa83c1e0fb73a928
3
+ metadata.gz: a910606f4987580ac86be55c02a6e3003b7cab21e93476dc1bf6fa4cc4512355
4
+ data.tar.gz: ef1dbfa473c66feb93c730a5bdd4e426203519a8d94da6afccf227bfa1fa084a
5
5
  SHA512:
6
- metadata.gz: f522c605c2277ac3552670aa67eafbd8ffe258717ac81bfafb64f4d22db0ec63891ce47faa85e761216ba7d4a2d2cb10138c67aaf28228a0cd3f5c53d8f3f8a5
7
- data.tar.gz: 700aa4b3162b5b566f83354232303873cda6377c0ead42d57c96d7db7fec41b620ed153a0d4af81ac29e5c1a0e253a6e74bb99ccfca25ab824db39238d36d5ed
6
+ metadata.gz: f1ddc8cabf1b9fbb319ad36406e9a99e585eaad8abd772aea378438f838e11be3e75a9c3fe2a1c4805ee6c84d9dfe256ba9cff3ac205406c899ee33a71d1122f
7
+ data.tar.gz: 00c745bf81b65f59c7e514e735bd3319548f747503907646ca619fbfb1634dd2bb5e199753141a9c651bf210326e803dbb3d4bdc4331bdaeba0ecb70425dd265
data/README.md CHANGED
@@ -9,16 +9,55 @@ Extract and work with your email using the command line with this plugin for [ch
9
9
  # Install chronicle-etl and this plugin
10
10
  $ gem install chronicle-etl
11
11
  $ chronicle-etl plugins:install email
12
+ ```
13
+
14
+ ### Extracting email from IMAP
15
+
16
+ For Gmail accounts, you can create an [app password](https://myaccount.google.com/apppasswords); your email address is your username.
17
+
18
+ ```sh
19
+ # Save username and password
20
+ $ chronicle-etl secrets:set imap username foo@gmail.com
21
+ $ chronicle-etl secrets:set imap password APPPASSWORD
22
+
23
+ # Then, retrieve your email from the last five days
24
+ $ chronicle-etl --extractor email:imap --transformer email --since 5d --loader json
25
+
26
+ # If you don't want to save your credentials as a secret, you can just pass
27
+ # them to the extractor directly
28
+ $ chronicle-etl --extractor email:imap --transformer email --since 5d --loader json \
29
+ --extractor-opts username:foo@gmail.com --password:APPPASSWORD
30
+ ```
12
31
 
13
- # Process emails from an mbox file
14
- $ chronicle-etl --extractor email:mbox -i test.mbox --transformer email --fields subject
32
+ ### Processing email from an .mbox file
33
+ The MBOX format is used to archive an email mailbox. [Google Takeout](https://takeout.google.com/settings/takeout) exports emails from gmail in this format.
34
+
35
+ ```sh
36
+ # Retrieve the subject lines of all emails in test.mbox
37
+ $ chronicle-etl --extractor email:mbox --input inbox.mbox --transformer email --fields subject
15
38
  ```
16
39
 
17
40
  ## Available Connectors
18
41
  ### Extractors
19
42
 
43
+ #### `imap`
44
+ Extractor for importing recent emails from an IMAP server.
45
+
46
+ ##### Settings
47
+
48
+ - `since`: Retrieve emails since this date
49
+ - `until`: Retrieve emails until this date
50
+ - `username`
51
+ - `password`
52
+ - `host`: (default: imap.gmail.com)
53
+ - `port`: (default: 993) Use 143 for unencrypted connections
54
+ - `mailbox`: (default: "[Gmail]/All Mail")
55
+ - `search_query`: When using Gmail, you can pass in a search query (`from:foo has:attachment`) to filter messages by
56
+
57
+ For accessing Gmail, you can create a one-time [app password](https://myaccount.google.com/apppasswords). Your email address is your username.
58
+
20
59
  #### `mbox`
21
- Extractor for importing emails from an mbox file
60
+ Extractor for importing emails from an MBOX file
22
61
 
23
62
  ##### Settings
24
63
  - `input`: A path to an .mbox file
@@ -31,6 +70,3 @@ Transform an email (in the form of a string) into Chronicle Schema
31
70
  ##### Settings
32
71
  - `body_as_markdown`: (default: false) Whether to convert the email body into markdown
33
72
  - `remove_signature`: (default: true) Whether to attempt to strip out the email signature (using the [`email_reply_parser`](https://github.com/github/email_reply_parser) gem)
34
-
35
- ## Roadmap
36
- - Add an IMAP (and gmail) extractor #1
@@ -36,10 +36,14 @@ Gem::Specification.new do |spec|
36
36
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
37
37
  spec.require_paths = ["lib"]
38
38
 
39
- spec.add_dependency "chronicle-etl", "~> 0.4.4"
40
- spec.add_dependency "mail", "~> 2.7"
41
39
  spec.add_dependency 'email_reply_parser', '~> 0.5'
42
40
  spec.add_dependency 'reverse_markdown', '~> 2.0'
41
+ spec.add_dependency "chronicle-etl", "~> 0.5"
42
+ spec.add_dependency "mail", "~> 2.7"
43
+ # Needed for Ruby 3.1 compatibility (https://github.com/mikel/mail/pull/1439#issuecomment-1002801221)
44
+ # TODO: check back for new version of `mail`
45
+ spec.add_dependency "net-imap"
46
+ spec.add_dependency "net-smtp"
43
47
 
44
48
  spec.add_development_dependency "bundler", "~> 2.1"
45
49
  spec.add_development_dependency "rake", "~> 13.0"
@@ -85,8 +85,7 @@ module Chronicle
85
85
  end
86
86
 
87
87
  def clean_body message
88
- # FIXME: this all needs to be refactored
89
-
88
+ # FIXME: this all needs to be refactored
90
89
  if message.multipart?
91
90
  body = message.text_part&.decoded rescue Mail::UnknownEncodingType
92
91
  else
@@ -94,6 +93,9 @@ module Chronicle
94
93
  body = body_to_markdown if @config.body_as_markdown
95
94
  end
96
95
 
96
+ return if body == Mail::UnknownEncodingType
97
+ return unless body && body != ""
98
+
97
99
  body = body_without_signature(body) if @config.remove_signature
98
100
 
99
101
  # Force UTF-8 encoding
@@ -107,8 +109,6 @@ module Chronicle
107
109
  end
108
110
 
109
111
  def body_without_signature(body)
110
- return unless body && body != ""
111
-
112
112
  # FIXME: regex in EmailReplyParse gem seems to get into infinite loops
113
113
  # with certain long bodies that have binary data
114
114
  parsed_body = Timeout::timeout(5) do
@@ -0,0 +1,91 @@
1
+ require 'net/imap'
2
+
3
+ module Chronicle
4
+ module Email
5
+ class IMAPExtractor < Chronicle::ETL::Extractor
6
+ register_connector do |r|
7
+ r.provider = 'email'
8
+ r.description = 'imap server'
9
+ r.identifier = 'imap'
10
+ end
11
+
12
+ setting :host, required: true, default: 'imap.gmail.com'
13
+ setting :port, type: :numeric, required: true, default: 993
14
+ setting :mailbox, required: true, default: '[Gmail]/All Mail'
15
+ setting :username, required: true
16
+ setting :password, required: true
17
+ setting :search_query
18
+
19
+ def prepare
20
+ @connection = create_connection
21
+ @message_ids = fetch_message_ids
22
+ end
23
+
24
+ def results_count
25
+ @message_ids.count
26
+ end
27
+
28
+ def extract
29
+ @message_ids.each do |message_id|
30
+ message = fetch_message(message_id)
31
+ yield Chronicle::ETL::Extraction.new(data: { email: message.attr["BODY[]"]} )
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def create_connection
38
+ connection = Net::IMAP.new(@config.host, @config.port, true)
39
+ connection.login(@config.username, @config.password)
40
+ connection.select(@config.mailbox)
41
+ connection
42
+ rescue Net::IMAP::NoResponseError => e
43
+ raise(Chronicle::ETL::ExtractionError, "Error connecting to IMAP server. Please check username and password")
44
+ end
45
+
46
+ def fetch_message_ids
47
+ keys = gmail_mode? ? search_keys_gmail : search_keys_default
48
+ message_ids = @connection.search(keys)
49
+ message_ids = message_ids.first(@config.limit) if @config.limit
50
+ message_ids
51
+ rescue Net::IMAP::BadResponseError => e
52
+ raise(Chronicle::ETL::ExtractionError, "Error searching IMAP server for messages")
53
+ end
54
+
55
+ def fetch_message(message_id)
56
+ response = @connection.fetch(message_id, "BODY.PEEK[]")
57
+ raise(Chronicle::ETL::ExtractionError, "Error loading message") unless response
58
+
59
+ return response[0]
60
+ end
61
+
62
+ def search_keys_gmail
63
+ # Gmail offers an extension to IMAP that lets us use gmail queries
64
+ q = ""
65
+
66
+ # First, we ignore drafts beacuse they break a lot of assumptions we
67
+ # make when when processing emails (lack of timestamps, ids, etc)
68
+ q = "-label:draft"
69
+
70
+ # We use UNIX timestamps in gmail filters which let us do more precise
71
+ # since/until compared with date-based imap filters
72
+ q += " after:#{@config.since.to_i}" if @config.since
73
+ q += " before:#{@config.until.to_i}" if @config.until
74
+ q += " #{@config.search_query}" if @config.search_query
75
+
76
+ ["X-GM-RAW", q]
77
+ end
78
+
79
+ def search_keys_default
80
+ keys = []
81
+ # TODO: test out non-gmail IMAP searching (for @config.search_query)
82
+ keys += ['SINCE', Net::IMAP.format_date(@config.since)] if @config.since
83
+ keys += ['BEFORE', Net::IMAP.format_date(@config.until)] if @config.until
84
+ end
85
+
86
+ def gmail_mode?
87
+ @config.host == 'imap.gmail.com'
88
+ end
89
+ end
90
+ end
91
+ end
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
2
  module Email
3
- VERSION = "0.2.0"
3
+ VERSION = "0.2.3"
4
4
  end
5
5
  end
@@ -1,6 +1,7 @@
1
1
  require "chronicle/email/version"
2
2
  require "chronicle/email/email_transformer"
3
3
  require "chronicle/email/mbox_extractor"
4
+ require "chronicle/email/imap_extractor"
4
5
 
5
6
  module Chronicle
6
7
  module Email
metadata CHANGED
@@ -1,45 +1,45 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-email
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-03-20 00:00:00.000000000 Z
11
+ date: 2022-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: chronicle-etl
14
+ name: email_reply_parser
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.4.4
19
+ version: '0.5'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.4.4
26
+ version: '0.5'
27
27
  - !ruby/object:Gem::Dependency
28
- name: mail
28
+ name: reverse_markdown
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.7'
33
+ version: '2.0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.7'
40
+ version: '2.0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: email_reply_parser
42
+ name: chronicle-etl
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
@@ -53,19 +53,47 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.5'
55
55
  - !ruby/object:Gem::Dependency
56
- name: reverse_markdown
56
+ name: mail
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '2.0'
61
+ version: '2.7'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '2.0'
68
+ version: '2.7'
69
+ - !ruby/object:Gem::Dependency
70
+ name: net-imap
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: net-smtp
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: bundler
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -128,6 +156,7 @@ files:
128
156
  - chronicle-email.gemspec
129
157
  - lib/chronicle/email.rb
130
158
  - lib/chronicle/email/email_transformer.rb
159
+ - lib/chronicle/email/imap_extractor.rb
131
160
  - lib/chronicle/email/mbox_extractor.rb
132
161
  - lib/chronicle/email/version.rb
133
162
  homepage: https://github.com/chronicle-app/chronicle-email