embulk-parser-sisimai 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ca1f55c7eb54c8e1beb658a51830320073f073bd
4
+ data.tar.gz: 85e0b1f14856907b7862e70eb7c6a6d8245f434b
5
+ SHA512:
6
+ metadata.gz: 6acd75c73bd36b988f35ead1be20c9059bb30834ad8a5aa74c82c4ff922affda987af8f29f8e998fa7b4d73da55066c92ddd296c1bcc959c31d99e423a3ab9b4
7
+ data.tar.gz: d86b40e1fde84d4e87202fc47fe9e3e114cdb9e2da779dd8ebba430e4d962b5fc6654eb510158af21836c6f900d7c17e8405e827ac1c303000482bbe168c00d5
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.0.4.0
data/CHANGES.md ADDED
@@ -0,0 +1,4 @@
1
+ 0.1.0 2016-02-18
2
+ ----------------
3
+
4
+ * First release
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,148 @@
1
+ # Sisimai parser plugin for Embulk
2
+
3
+ Embulk parser plugin for [Sisimai](https://github.com/sisimai/rb-Sisimai)
4
+ bounce mail analyzing interface(A successor to bounceHammer).
5
+
6
+ ## Overview
7
+
8
+ * **Plugin type**: parser
9
+ * **Guess supported**: no
10
+
11
+ ## Configuration
12
+
13
+ - **format**: output format (`column` or `json`, default: `column`)
14
+ - **extract_mail_address**: extract mail_address into user, host and verp parts(bool, default: false).
15
+ - **include_delivered**: include delivered mail Status: 2.X.Y, (boolean, default: `false`)
16
+
17
+ The ``extract_mail_address`` parameter is column format mode only.
18
+ ## Example
19
+
20
+ ## column format
21
+
22
+ ```yaml
23
+ in:
24
+ type: any file input plugin type
25
+ parser:
26
+ type: sisimai
27
+ format: column
28
+ ```
29
+
30
+ Example output
31
+
32
+ ``extract_mail_address``: ``false`` (default)
33
+
34
+ ```
35
+ action ( string) : failed
36
+ addresser ( string) : user1@example.jp
37
+ alias ( string) :
38
+ deliverystatus ( string) : 5.1.2
39
+ destination ( string) : example.gov
40
+ diagnosticcode ( string) : 550 Host unknown
41
+ diagnostictype ( string) : SMTP
42
+ feedbacktype ( string) :
43
+ lhost ( string) : 192.0.2.97
44
+ listid ( string) :
45
+ messageid ( string) : AA406E7E18714AB2927DAACC24B47C4A@USER-PC97
46
+ reason ( string) : hostunknown
47
+ recipient ( string) : domain-does-not-exist@example.gov
48
+ replycode ( string) : 550
49
+ rhost ( string) : example.gov
50
+ senderdomain ( string) : example.jp
51
+ smtpagent ( string) : Sendmail
52
+ smtpcommand ( string) :
53
+ softbounce ( long) : 0
54
+ subject ( string) : MULTIBYTE CHARACTERS HAVE BEEN REMOVED
55
+ timestamp (timestamp) : 2008-09-18 08:54:04 UTC
56
+ timezoneoffset ( string) : +0900
57
+ token ( string) : d059e55e074333fe59001b1d30d27da85a1a9c1d
58
+ ```
59
+
60
+ ``extract_mail_address``: ``true``
61
+
62
+ ```
63
+ action ( string) : failed
64
+ addresser ( string) : user1@example.jp
65
+ alias ( string) :
66
+ deliverystatus ( string) : 5.1.2
67
+ destination ( string) : example.gov
68
+ diagnosticcode ( string) : 550 Host unknown
69
+ diagnostictype ( string) : SMTP
70
+ feedbacktype ( string) :
71
+ lhost ( string) : 192.0.2.97
72
+ listid ( string) :
73
+ messageid ( string) : AA406E7E18714AB2927DAACC24B47C4A@USER-PC97
74
+ reason ( string) : hostunknown
75
+ recipient ( string) : domain-does-not-exist@example.gov
76
+ replycode ( string) : 550
77
+ rhost ( string) : example.gov
78
+ senderdomain ( string) : example.jp
79
+ smtpagent ( string) : Sendmail
80
+ smtpcommand ( string) :
81
+ softbounce ( long) : 0
82
+ subject ( string) : MULTIBYTE CHARACTERS HAVE BEEN REMOVED
83
+ timestamp (timestamp) : 2008-09-18 08:54:04 UTC
84
+ timezoneoffset ( string) : +0900
85
+ token ( string) : d059e55e074333fe59001b1d30d27da85a1a9c1d
86
+ addresser_user ( string) : user1
87
+ addresser_host ( string) : example.jp
88
+ addresser_vrep ( string) :
89
+ recipient_user ( string) : domain-does-not-exist
90
+ recipient_host ( string) : example.gov
91
+ recipient_vrep ( string) :
92
+ ```
93
+
94
+
95
+ ## json format
96
+
97
+ ```yaml
98
+ in:
99
+ type: any file input plugin type
100
+ parser:
101
+ type: sisimai
102
+ format: json
103
+ ```
104
+
105
+ ```
106
+ result (json) : { "token": "d059e55e074333fe59001b1d30d27da85a1a9c1d", "lhost": "192.0.2.97", "rhost": "example.gov", "listid": "", "alias": "", "reason": "hostunknown", "subject": "MULTIBYTE CHARACTERS HAVE BEEN REMOVED", "messageid": "AA406E7E18714AB2927DAACC24B47C4A@USER-PC97", "smtpagent": "Sendmail", "smtpcommand": "", "destination": "example.gov", "diagnosticcode": "550 Host unknown", "senderdomain": "example.jp", "deliverystatus": "5.1.2", "timezoneoffset": "+0900", "feedbacktype": "", "diagnostictype": "SMTP", "action": "failed", "replycode": "550", "softbounce": 0, "addresser": "user1@example.jp", "recipient": "domain-does-not-exist@example.gov", "timestamp": 1221728044 }
107
+ ```
108
+
109
+
110
+ ```json
111
+ {
112
+ "token": "d059e55e074333fe59001b1d30d27da85a1a9c1d",
113
+ "lhost": "192.0.2.97",
114
+ "rhost": "example.gov",
115
+ "listid": "",
116
+ "alias": "",
117
+ "reason": "hostunknown",
118
+ "subject": "MULTIBYTE CHARACTERS HAVE BEEN REMOVED",
119
+ "messageid": "AA406E7E18714AB2927DAACC24B47C4A@USER-PC97",
120
+ "smtpagent": "Sendmail",
121
+ "smtpcommand": "",
122
+ "destination": "example.gov",
123
+ "diagnosticcode": "550 Host unknown",
124
+ "senderdomain": "example.jp",
125
+ "deliverystatus": "5.1.2",
126
+ "timezoneoffset": "+0900",
127
+ "feedbacktype": "",
128
+ "diagnostictype": "SMTP",
129
+ "action": "failed",
130
+ "replycode": "550",
131
+ "softbounce": 0,
132
+ "addresser": "user1@example.jp",
133
+ "recipient": "domain-does-not-exist@example.gov",
134
+ "timestamp": 1221728044
135
+ }
136
+ ```
137
+
138
+ ## Install
139
+
140
+ ```
141
+ $ embulk gem install embulk-parser-sisimai
142
+ ```
143
+
144
+ ## Build
145
+
146
+ ```
147
+ $ rake
148
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,21 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-parser-sisimai"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["Hiroyuki Sato"]
6
+ spec.summary = "Sisimai Analyzer parser plugin for Embulk"
7
+ spec.description = "Parses Sisimai Analyzer files read by other file input plugins."
8
+ spec.email = ["hiroysato@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/hiroyuki-sato/embulk-parser-sisimai"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
17
+ spec.add_dependency 'sisimai', ['~> 4.16.0']
18
+ spec.add_development_dependency 'embulk', ['>= 0.8.1']
19
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
20
+ spec.add_development_dependency 'rake', ['>= 10.0']
21
+ end
@@ -0,0 +1,70 @@
1
+ Received: from localhost (localhost)
2
+ by mta-smtp-out-24.example.jp (8.14.1/8.14.1) id m8I8s45D007047;
3
+ Thu, 18 Sep 2008 17:54:04 +0900 (JST)
4
+ Date: Thu, 18 Sep 2008 17:54:04 +0900 (JST)
5
+ From: Mail Delivery Subsystem <MAILER-DAEMON>
6
+ Message-Id: <200809180854.m8I8s45D007047@mta-smtp-out-24.example.jp>
7
+ To: postmaster
8
+ MIME-Version: 1.0
9
+ Content-Type: multipart/report; report-type=delivery-status;
10
+ boundary="m8I8s45D007047.1221728044/mta-smtp-out-24.example.jp"
11
+ Subject: Postmaster notify: see transcript for details
12
+ Auto-Submitted: auto-generated (postmaster-notification)
13
+
14
+ This is a MIME-encapsulated message
15
+
16
+ --m8I8s45D007047.1221728044/mta-smtp-out-24.example.jp
17
+
18
+ The original message was received at Thu, 18 Sep 2008 17:54:04 +0900 (JST)
19
+ from [192.0.2.97]
20
+
21
+ ----- The following addresses had permanent fatal errors -----
22
+ <domain-does-not-exist@example.gov>
23
+ (reason: 550 Host unknown)
24
+
25
+ ----- Transcript of session follows -----
26
+ 550 5.1.2 <domain-does-not-exist@example.gov>... Host unknown (Name server: example.gov.: host not found)
27
+
28
+ --m8I8s45D007047.1221728044/mta-smtp-out-24.example.jp
29
+ Content-Type: message/delivery-status
30
+
31
+ Reporting-MTA: dns; mta-smtp-out-24.example.jp
32
+ Received-From-MTA: DNS; [192.0.2.97]
33
+ Arrival-Date: Thu, 18 Sep 2008 17:54:04 +0900 (JST)
34
+
35
+ Final-Recipient: RFC822; domain-does-not-exist@example.gov
36
+ Action: failed
37
+ Status: 5.1.2
38
+ Remote-MTA: DNS; example.gov
39
+ Diagnostic-Code: SMTP; 550 Host unknown
40
+ Last-Attempt-Date: Thu, 18 Sep 2008 17:54:04 +0900 (JST)
41
+
42
+ --m8I8s45D007047.1221728044/mta-smtp-out-24.example.jp
43
+ Content-Type: text/rfc822-headers
44
+
45
+ Return-Path: <domain-does-not-exist@example.jp>
46
+ Received: from USER-PC97 ([192.0.2.97])
47
+ (authenticated bits=0)
48
+ by mta-smtp-out-24.example.jp (8.14.1/8.14.1) with ESMTP id m8I8s45C006868
49
+ for <domain-does-not-exist@example.gov>;
50
+ Thu, 18 Sep 2008 17:54:04 +0900 (JST)
51
+ Message-ID: <AA406E7E18714AB2927DAACC24B47C4A@USER-PC97>
52
+ From: "User1, For Example" <user1@example.jp>
53
+ To: <domain-does-not-exist@example.gov>
54
+ Subject: =?utf-8?B?44Kt44K444OI44Op?=
55
+ Date: Thu, 18 Sep 2008 17:53:55 +0900
56
+ MIME-Version: 1.0
57
+ Content-Type: text/plain;
58
+ format=flowed;
59
+ charset="iso-2022-jp";
60
+ reply-type=original
61
+ Content-Transfer-Encoding: 7bit
62
+ X-Priority: 3
63
+ X-MSMail-Priority: Normal
64
+ X-Mailer: Microsoft Outlook Express 6.00.2900.5512
65
+ X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2900.5579
66
+
67
+ --m8I8s45D007047.1221728044/mta-smtp-out-24.example.jp--
68
+
69
+
70
+
@@ -0,0 +1,70 @@
1
+ Received: from localhost (localhost)
2
+ by smtp-out-34.example.jp (8.14.3/8.14.3) id n3RNHmqg024671;
3
+ Tue, 28 Apr 2009 08:17:48 +0900 (JST)
4
+ Date: Tue, 28 Apr 2009 08:17:48 +0900 (JST)
5
+ From: Mail Delivery Subsystem <MAILER-DAEMON>
6
+ Message-Id: <200904272317.n3RNHmqg024671@smtp-out-34.example.jp>
7
+ To: <user1@example.jp>
8
+ MIME-Version: 1.0
9
+ Content-Type: multipart/report; report-type=delivery-status;
10
+ boundary="n3RNHmqg024671.1240874268/smtp-out-34.example.jp"
11
+ Subject: Returned mail: see transcript for details
12
+ Auto-Submitted: auto-generated (failure)
13
+
14
+ This is a MIME-encapsulated message
15
+
16
+ --n3RNHmqg024671.1240874268/smtp-out-34.example.jp
17
+
18
+ The original message was received at Tue, 28 Apr 2009 08:17:47 +0900 (JST)
19
+ from [192.0.2.31]
20
+
21
+ ----- The following addresses had permanent fatal errors -----
22
+ <recipient-address-does-not-exist@docomo.ne.jp>
23
+ (reason: 550 Unknown user recipient-address-does-not-exist@docomo.ne.jp)
24
+
25
+ ----- Transcript of session follows -----
26
+ ... while talking to mfsmax.docomo.ne.jp.:
27
+ >>> RCPT To:<recipient-address-does-not-exist@docomo.ne.jp>
28
+ <<< 550 Unknown user recipient-address-does-not-exist@docomo.ne.jp
29
+ 550 5.1.1 <recipient-address-does-not-exist@docomo.ne.jp>... User unknown
30
+ >>> DATA
31
+ <<< 503 Bad sequence of commands
32
+
33
+ --n3RNHmqg024671.1240874268/smtp-out-34.example.jp
34
+ Content-Type: message/delivery-status
35
+
36
+ Reporting-MTA: dns; smtp-out-34.example.jp
37
+ Received-From-MTA: DNS; [192.0.2.31]
38
+ Arrival-Date: Tue, 28 Apr 2009 08:17:47 +0900 (JST)
39
+
40
+ Final-Recipient: RFC822; recipient-address-does-not-exist@docomo.ne.jp
41
+ Action: failed
42
+ Status: 5.1.1
43
+ Remote-MTA: DNS; mfsmax.docomo.ne.jp
44
+ Diagnostic-Code: SMTP; 550 Unknown user recipient-address-does-not-exist@docomo.ne.jp
45
+ Last-Attempt-Date: Tue, 28 Apr 2009 08:17:48 +0900 (JST)
46
+
47
+ --n3RNHmqg024671.1240874268/smtp-out-34.example.jp
48
+ Content-Type: text/rfc822-headers
49
+
50
+ Return-Path: <user2@example.jp>
51
+ Received: from [192.0.2.31] ([192.0.2.31])
52
+ (authenticated bits=0)
53
+ by smtp-out-34.example.jp (8.14.3/8.14.3) with ESMTP id n3RNHkqg008345
54
+ for <recipient-address-does-not-exist@docomo.ne.jp>; Tue, 28 Apr 2009 08:17:47 +0900 (JST)
55
+ Mime-Version: 1.0 (Apple Message framework v753.1)
56
+ Content-Transfer-Encoding: 7bit
57
+ Message-Id: <E9E9449A-4918-4B8B-8589-435230D67AC7@example.jp>
58
+ Content-Type: text/plain; charset=US-ASCII; format=flowed
59
+ To: recipient-address-does-not-exist@docomo.ne.jp
60
+ From: "User2, For Example" <user2@example.jp>
61
+ Subject: =?utf-8?B?44Kt44K444OI44Op?=
62
+ Date: Tue, 28 Apr 2009 08:17:45 +0900
63
+ X-Mailer: Apple Mail (2.753.1)
64
+ X-Virus-Scanned: ClamAV version 0.94.2, clamav-milter version 0.94.2 on 192.0.2.252
65
+ X-Virus-Status: Clean
66
+
67
+ --n3RNHmqg024671.1240874268/smtp-out-34.example.jp--
68
+
69
+
70
+
@@ -0,0 +1,122 @@
1
+ From MAILER-DAEMON Thu Apr 29 23:34:45 2015
2
+ Return-Path: <>
3
+ X-Original-To: root@neko-222-2222.vs.example.ne.jp
4
+ Delivered-To: root@neko-222-2222.vs.example.ne.jp
5
+ Received: by neko-222-2222.vs.example.ne.jp (Postfix)
6
+ id CE7E412402D0; Thu, 29 Apr 2015 23:34:45 +0900 (JST)
7
+ Date: Thu, 29 Apr 2015 23:34:45 +0900 (JST)
8
+ From: MAILER-DAEMON@neko-222-2222.vs.example.ne.jp (Mail Delivery System)
9
+ Subject: Mail Delivery Status Report
10
+ To: root@neko-222-2222.vs.example.ne.jp
11
+ Auto-Submitted: auto-replied
12
+ MIME-Version: 1.0
13
+ Content-Type: multipart/report; report-type=delivery-status;
14
+ boundary="2222CCCC0022.2222000022/neko-222-2222.vs.example.ne.jp"
15
+ Message-Id: <20151025071802.CE7E412402D0@neko-222-2222.vs.example.ne.jp>
16
+
17
+ This is a MIME-encapsulated message.
18
+
19
+ --2222CCCC0022.2222000022/neko-222-2222.vs.example.ne.jp
20
+ Content-Description: Notification
21
+ Content-Type: text/plain; charset=us-ascii
22
+
23
+ This is the mail system at host neko-222-2222.vs.example.ne.jp.
24
+
25
+ Enclosed is the mail delivery report that you requested.
26
+
27
+ The mail system
28
+
29
+ <kijitora@neko.example.jp>: delivery via
30
+ mail.neko.example.jp[192.0.2.2]:25: 250 2.1.5 Ok
31
+
32
+ --2222CCCC0022.2222000022/neko-222-2222.vs.example.ne.jp
33
+ Content-Description: Delivery report
34
+ Content-Type: message/delivery-status
35
+
36
+ Reporting-MTA: dns; neko-222-2222.vs.example.ne.jp
37
+ X-Postfix-Queue-ID: 22CC00222233
38
+ X-Postfix-Sender: rfc822; root@neko-222-2222.vs.example.ne.jp
39
+ Arrival-Date: Thu, 29 Apr 2015 23:34:45 +0900 (JST)
40
+
41
+ Final-Recipient: rfc822; kijitora@neko.example.jp
42
+ Action: deliverable
43
+ Status: 2.1.5
44
+ Remote-MTA: dns; mail.neko.example.jp
45
+ Diagnostic-Code: smtp; 250 2.1.5 Ok
46
+
47
+ --2222CCCC0022.2222000022/neko-222-2222.vs.example.ne.jp
48
+ Content-Description: Message Headers
49
+ Content-Type: text/rfc822-headers
50
+
51
+ Return-Path: <root@neko-222-2222.vs.example.ne.jp>
52
+ Received: by neko-222-2222.vs.example.ne.jp (Postfix, from userid 0)
53
+ id 22CC00222233; Thu, 29 Apr 2015 23:34:45 +0900 (JST)
54
+ From: root@neko-222-2222.vs.example.ne.jp
55
+ Subject: Nyaaan
56
+ To: kijitora@neko.example.jp
57
+ Message-Id: <20151025071802.22CC00222233@neko-222-2222.vs.example.ne.jp>
58
+ Date: Thu, 29 Apr 2015 23:34:45 +0900 (JST)
59
+
60
+ --2222CCCC0022.2222000022/neko-222-2222.vs.example.ne.jp--
61
+
62
+ From MAILER-DAEMON Thu Apr 29 23:34:45 2015
63
+ Return-Path: <>
64
+ X-Original-To: root@neko-222-2222.vs.example.ne.jp
65
+ Delivered-To: root@neko-222-2222.vs.example.ne.jp
66
+ Received: by neko-222-2222.vs.example.ne.jp (Postfix)
67
+ id 0472E12402D0; Thu, 29 Apr 2015 23:34:45 +0900 (JST)
68
+ Date: Thu, 29 Apr 2015 23:34:45 +0900 (JST)
69
+ From: MAILER-DAEMON@neko-222-2222.vs.example.ne.jp (Mail Delivery System)
70
+ Subject: Mail Delivery Status Report
71
+ To: root@neko-222-2222.vs.example.ne.jp
72
+ Auto-Submitted: auto-replied
73
+ MIME-Version: 1.0
74
+ Content-Type: multipart/report; report-type=delivery-status;
75
+ boundary="CC002222FFEE.2200222222/neko-222-2222.vs.example.ne.jp"
76
+ Message-Id: <20151025071833.0472E12402D0@neko-222-2222.vs.example.ne.jp>
77
+
78
+ This is a MIME-encapsulated message.
79
+
80
+ --CC002222FFEE.2200222222/neko-222-2222.vs.example.ne.jp
81
+ Content-Description: Notification
82
+ Content-Type: text/plain; charset=us-ascii
83
+
84
+ This is the mail system at host neko-222-2222.vs.example.ne.jp.
85
+
86
+ Enclosed is the mail delivery report that you requested.
87
+
88
+ The mail system
89
+
90
+ <info@neko.example.jp>: delivery via mail.neko.example.jp[192.0.2.2]:25: 250
91
+ 2.1.5 Ok
92
+
93
+ --CC002222FFEE.2200222222/neko-222-2222.vs.example.ne.jp
94
+ Content-Description: Delivery report
95
+ Content-Type: message/delivery-status
96
+
97
+ Reporting-MTA: dns; neko-222-2222.vs.example.ne.jp
98
+ X-Postfix-Queue-ID: CC002222FFEE
99
+ X-Postfix-Sender: rfc822; root@neko-222-2222.vs.example.ne.jp
100
+ Arrival-Date: Thu, 29 Apr 2015 23:34:45 +0900 (JST)
101
+
102
+ Final-Recipient: rfc822; info@neko.example.jp
103
+ Action: deliverable
104
+ Status: 2.1.5
105
+ Remote-MTA: dns; mail.neko.example.jp
106
+ Diagnostic-Code: smtp; 250 2.1.5 Ok
107
+
108
+ --CC002222FFEE.2200222222/neko-222-2222.vs.example.ne.jp
109
+ Content-Description: Message Headers
110
+ Content-Type: text/rfc822-headers
111
+
112
+ Return-Path: <root@neko-222-2222.vs.example.ne.jp>
113
+ Received: by neko-222-2222.vs.example.ne.jp (Postfix, from userid 0)
114
+ id CC002222FFEE; Thu, 29 Apr 2015 23:34:45 +0900 (JST)
115
+ From: root@neko-222-2222.vs.example.ne.jp
116
+ Subject: Nyaaan
117
+ To: info@neko.example.jp
118
+ Message-Id: <20151025071832.CC002222FFEE@neko-222-2222.vs.example.ne.jp>
119
+ Date: Thu, 29 Apr 2015 23:34:45 +0900 (JST)
120
+
121
+ --CC002222FFEE.2200222222/neko-222-2222.vs.example.ne.jp--
122
+
@@ -0,0 +1,9 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/Maildir
4
+ parser:
5
+ type: sisimai
6
+ format: column
7
+ # include_delivered: true
8
+ out:
9
+ type: stdout
@@ -0,0 +1,8 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example/Maildir
4
+ parser:
5
+ type: sisimai
6
+ format: json
7
+ out:
8
+ type: stdout
@@ -0,0 +1,70 @@
1
+ Received: from localhost (localhost)
2
+ by mta-smtp-out-24.example.jp (8.14.1/8.14.1) id m8I8s45D007047;
3
+ Thu, 18 Sep 2008 17:54:04 +0900 (JST)
4
+ Date: Thu, 18 Sep 2008 17:54:04 +0900 (JST)
5
+ From: Mail Delivery Subsystem <MAILER-DAEMON>
6
+ Message-Id: <200809180854.m8I8s45D007047@mta-smtp-out-24.example.jp>
7
+ To: postmaster
8
+ MIME-Version: 1.0
9
+ Content-Type: multipart/report; report-type=delivery-status;
10
+ boundary="m8I8s45D007047.1221728044/mta-smtp-out-24.example.jp"
11
+ Subject: Postmaster notify: see transcript for details
12
+ Auto-Submitted: auto-generated (postmaster-notification)
13
+
14
+ This is a MIME-encapsulated message
15
+
16
+ --m8I8s45D007047.1221728044/mta-smtp-out-24.example.jp
17
+
18
+ The original message was received at Thu, 18 Sep 2008 17:54:04 +0900 (JST)
19
+ from [192.0.2.97]
20
+
21
+ ----- The following addresses had permanent fatal errors -----
22
+ <domain-does-not-exist@example.gov>
23
+ (reason: 550 Host unknown)
24
+
25
+ ----- Transcript of session follows -----
26
+ 550 5.1.2 <domain-does-not-exist@example.gov>... Host unknown (Name server: example.gov.: host not found)
27
+
28
+ --m8I8s45D007047.1221728044/mta-smtp-out-24.example.jp
29
+ Content-Type: message/delivery-status
30
+
31
+ Reporting-MTA: dns; mta-smtp-out-24.example.jp
32
+ Received-From-MTA: DNS; [192.0.2.97]
33
+ Arrival-Date: Thu, 18 Sep 2008 17:54:04 +0900 (JST)
34
+
35
+ Final-Recipient: RFC822; domain-does-not-exist@example.gov
36
+ Action: failed
37
+ Status: 5.1.2
38
+ Remote-MTA: DNS; example.gov
39
+ Diagnostic-Code: SMTP; 550 Host unknown
40
+ Last-Attempt-Date: Thu, 18 Sep 2008 17:54:04 +0900 (JST)
41
+
42
+ --m8I8s45D007047.1221728044/mta-smtp-out-24.example.jp
43
+ Content-Type: text/rfc822-headers
44
+
45
+ Return-Path: <domain-does-not-exist@example.jp>
46
+ Received: from USER-PC97 ([192.0.2.97])
47
+ (authenticated bits=0)
48
+ by mta-smtp-out-24.example.jp (8.14.1/8.14.1) with ESMTP id m8I8s45C006868
49
+ for <domain-does-not-exist@example.gov>;
50
+ Thu, 18 Sep 2008 17:54:04 +0900 (JST)
51
+ Message-ID: <AA406E7E18714AB2927DAACC24B47C4A@USER-PC97>
52
+ From: "User1, For Example" <user1@example.jp>
53
+ To: <domain-does-not-exist@example.gov>
54
+ Subject: =?utf-8?B?44Kt44K444OI44Op?=
55
+ Date: Thu, 18 Sep 2008 17:53:55 +0900
56
+ MIME-Version: 1.0
57
+ Content-Type: text/plain;
58
+ format=flowed;
59
+ charset="iso-2022-jp";
60
+ reply-type=original
61
+ Content-Transfer-Encoding: 7bit
62
+ X-Priority: 3
63
+ X-MSMail-Priority: Normal
64
+ X-Mailer: Microsoft Outlook Express 6.00.2900.5512
65
+ X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2900.5579
66
+
67
+ --m8I8s45D007047.1221728044/mta-smtp-out-24.example.jp--
68
+
69
+
70
+
@@ -0,0 +1,70 @@
1
+ Received: from localhost (localhost)
2
+ by smtp-out-34.example.jp (8.14.3/8.14.3) id n3RNHmqg024671;
3
+ Tue, 28 Apr 2009 08:17:48 +0900 (JST)
4
+ Date: Tue, 28 Apr 2009 08:17:48 +0900 (JST)
5
+ From: Mail Delivery Subsystem <MAILER-DAEMON>
6
+ Message-Id: <200904272317.n3RNHmqg024671@smtp-out-34.example.jp>
7
+ To: <user1@example.jp>
8
+ MIME-Version: 1.0
9
+ Content-Type: multipart/report; report-type=delivery-status;
10
+ boundary="n3RNHmqg024671.1240874268/smtp-out-34.example.jp"
11
+ Subject: Returned mail: see transcript for details
12
+ Auto-Submitted: auto-generated (failure)
13
+
14
+ This is a MIME-encapsulated message
15
+
16
+ --n3RNHmqg024671.1240874268/smtp-out-34.example.jp
17
+
18
+ The original message was received at Tue, 28 Apr 2009 08:17:47 +0900 (JST)
19
+ from [192.0.2.31]
20
+
21
+ ----- The following addresses had permanent fatal errors -----
22
+ <recipient-address-does-not-exist@docomo.ne.jp>
23
+ (reason: 550 Unknown user recipient-address-does-not-exist@docomo.ne.jp)
24
+
25
+ ----- Transcript of session follows -----
26
+ ... while talking to mfsmax.docomo.ne.jp.:
27
+ >>> RCPT To:<recipient-address-does-not-exist@docomo.ne.jp>
28
+ <<< 550 Unknown user recipient-address-does-not-exist@docomo.ne.jp
29
+ 550 5.1.1 <recipient-address-does-not-exist@docomo.ne.jp>... User unknown
30
+ >>> DATA
31
+ <<< 503 Bad sequence of commands
32
+
33
+ --n3RNHmqg024671.1240874268/smtp-out-34.example.jp
34
+ Content-Type: message/delivery-status
35
+
36
+ Reporting-MTA: dns; smtp-out-34.example.jp
37
+ Received-From-MTA: DNS; [192.0.2.31]
38
+ Arrival-Date: Tue, 28 Apr 2009 08:17:47 +0900 (JST)
39
+
40
+ Final-Recipient: RFC822; recipient-address-does-not-exist@docomo.ne.jp
41
+ Action: failed
42
+ Status: 5.1.1
43
+ Remote-MTA: DNS; mfsmax.docomo.ne.jp
44
+ Diagnostic-Code: SMTP; 550 Unknown user recipient-address-does-not-exist@docomo.ne.jp
45
+ Last-Attempt-Date: Tue, 28 Apr 2009 08:17:48 +0900 (JST)
46
+
47
+ --n3RNHmqg024671.1240874268/smtp-out-34.example.jp
48
+ Content-Type: text/rfc822-headers
49
+
50
+ Return-Path: <user2@example.jp>
51
+ Received: from [192.0.2.31] ([192.0.2.31])
52
+ (authenticated bits=0)
53
+ by smtp-out-34.example.jp (8.14.3/8.14.3) with ESMTP id n3RNHkqg008345
54
+ for <recipient-address-does-not-exist@docomo.ne.jp>; Tue, 28 Apr 2009 08:17:47 +0900 (JST)
55
+ Mime-Version: 1.0 (Apple Message framework v753.1)
56
+ Content-Transfer-Encoding: 7bit
57
+ Message-Id: <E9E9449A-4918-4B8B-8589-435230D67AC7@example.jp>
58
+ Content-Type: text/plain; charset=US-ASCII; format=flowed
59
+ To: recipient-address-does-not-exist@docomo.ne.jp
60
+ From: "User2, For Example" <user2@example.jp>
61
+ Subject: =?utf-8?B?44Kt44K444OI44Op?=
62
+ Date: Tue, 28 Apr 2009 08:17:45 +0900
63
+ X-Mailer: Apple Mail (2.753.1)
64
+ X-Virus-Scanned: ClamAV version 0.94.2, clamav-milter version 0.94.2 on 192.0.2.252
65
+ X-Virus-Status: Clean
66
+
67
+ --n3RNHmqg024671.1240874268/smtp-out-34.example.jp--
68
+
69
+
70
+
@@ -0,0 +1,63 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ # TODO implement guess plugin to make this command work:
5
+ # $ embulk guess -g "sisimai" partial-config.yml
6
+ #
7
+ # Depending on the file format the plugin uses, you can use choose
8
+ # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9
+ # or line guess (LineGuessPlugin).
10
+
11
+ #require "embulk/parser/sisimai.rb"
12
+
13
+ #class Sisimai < GuessPlugin
14
+ # Plugin.register_guess("sisimai_analyzer", self)
15
+ #
16
+ # def guess(config, sample_buffer)
17
+ # if sample_buffer[0,2] == GZIP_HEADER
18
+ # guessed = {}
19
+ # guessed["type"] = "sisimai_analyzer"
20
+ # guessed["property1"] = "guessed-value"
21
+ # return {"parser" => guessed}
22
+ # else
23
+ # return {}
24
+ # end
25
+ # end
26
+ #end
27
+
28
+ #class Sisimai < TextGuessPlugin
29
+ # Plugin.register_guess("sisimai_analyzer", self)
30
+ #
31
+ # def guess_text(config, sample_text)
32
+ # js = JSON.parse(sample_text) rescue nil
33
+ # if js && js["mykeyword"] == "keyword"
34
+ # guessed = {}
35
+ # guessed["type"] = "sisimai_analyzer"
36
+ # guessed["property1"] = "guessed-value"
37
+ # return {"parser" => guessed}
38
+ # else
39
+ # return {}
40
+ # end
41
+ # end
42
+ #end
43
+
44
+ #class Sisimai < LineGuessPlugin
45
+ # Plugin.register_guess("sisimai_analyzer", self)
46
+ #
47
+ # def guess_lines(config, sample_lines)
48
+ # all_line_matched = sample_lines.all? do |line|
49
+ # line =~ /mypattern/
50
+ # end
51
+ # if all_line_matched
52
+ # guessed = {}
53
+ # guessed["type"] = "sisimai_analyzer"
54
+ # guessed["property1"] = "guessed-value"
55
+ # return {"parser" => guessed}
56
+ # else
57
+ # return {}
58
+ # end
59
+ # end
60
+ #end
61
+
62
+ end
63
+ end
@@ -0,0 +1,138 @@
1
+ require 'sisimai'
2
+ require 'sisimai/message'
3
+ require 'sisimai/data'
4
+
5
+ module Embulk
6
+ module Parser
7
+
8
+ class Sisimai < ParserPlugin
9
+ Plugin.register_parser("sisimai", self)
10
+
11
+ def self.transaction(config, &control)
12
+ task = {
13
+ "format" => config.param("format", :string, default: "column"),
14
+ "extract_mail_address" => config.param("extract_mail_address", :bool, default: false),
15
+ "include_delivered" => config.param("include_delivered", :bool, default: false)
16
+ }
17
+
18
+ format = task["format"]
19
+ columns = case format
20
+ when "json"
21
+ [ Column.new(0, "result", :json) ]
22
+ when "column"
23
+ c = [
24
+ Column.new(0, "action", :string),
25
+ Column.new(1, "addresser",:string),
26
+ Column.new(2, "alias", :string),
27
+ Column.new(3, "deliverystatus", :string),
28
+ Column.new(4, "destination", :string),
29
+ Column.new(5, "diagnosticcode", :string),
30
+ Column.new(6, "diagnostictype", :string),
31
+ Column.new(7, "feedbacktype", :string),
32
+ Column.new(8, "lhost", :string),
33
+ Column.new(9, "listid", :string),
34
+ Column.new(10, "messageid", :string),
35
+ Column.new(11, "reason", :string),
36
+ Column.new(12, "recipient", :string),
37
+ Column.new(13, "replycode", :string),
38
+ Column.new(14, "rhost", :string),
39
+ Column.new(15, "senderdomain", :string),
40
+ Column.new(16, "smtpagent", :string),
41
+ Column.new(17, "smtpcommand", :string),
42
+ Column.new(18, "softbounce", :long),
43
+ Column.new(19, "subject", :string),
44
+ Column.new(20, "timestamp", :timestamp),
45
+ Column.new(21, "timezoneoffset", :string),
46
+ Column.new(22, "token", :string),
47
+ ]
48
+ if task['extract_mail_address'] == true
49
+ c += [
50
+ Column.new(23, "addresser_user", :string),
51
+ Column.new(24, "addresser_host", :string),
52
+ Column.new(25, "addresser_vrep", :string),
53
+ Column.new(26, "recipient_user", :string),
54
+ Column.new(27, "recipient_host", :string),
55
+ Column.new(28, "recipient_vrep", :string),
56
+ ]
57
+ end
58
+ c
59
+ else
60
+ raise ArgumentError,"Unkown format type: #{format}"
61
+ end
62
+
63
+ yield(task, columns)
64
+ end
65
+
66
+ def init
67
+ # initialization code:
68
+ @format = task["format"]
69
+ @inc_delivered = task["include_delivered"]
70
+ @extract_mail_address = task["extract_mail_address"]
71
+ Embulk.logger.info "sisimai format: #{@format} include_delivered: #{@inc_delivered}, extract_mail_address: #{@extract_mail_address}"
72
+ end
73
+
74
+ def run(file_input)
75
+ while file = file_input.next_file
76
+ mesg = ::Sisimai::Message.new( data: file.read )
77
+ datas = ::Sisimai::Data.make( data: mesg, delivered: @inc_delivered )
78
+ if datas.nil?
79
+ Embulk.logger.info "This file does not contaion bounce mail. skip."
80
+ next
81
+ end
82
+ datas.each do |data|
83
+ case @format
84
+ when "json"
85
+ page_builder.add([ data.dump ])
86
+ when "column"
87
+ column_data = make_column_array(data)
88
+
89
+ page_builder.add(column_data)
90
+ else
91
+ raise RuntimeError,"Invalid format #{@format}"
92
+ end
93
+ end
94
+ end
95
+ page_builder.finish
96
+ end
97
+ private
98
+ def make_column_array(data)
99
+ row = [
100
+ data.action,
101
+ data.addresser.to_json,
102
+ data.alias,
103
+ data.deliverystatus,
104
+ data.destination,
105
+ data.diagnosticcode,
106
+ data.diagnostictype,
107
+ data.feedbacktype,
108
+ data.lhost,
109
+ data.listid,
110
+ data.messageid,
111
+ data.reason,
112
+ data.recipient.to_json,
113
+ data.replycode,
114
+ data.rhost,
115
+ data.senderdomain,
116
+ data.smtpagent,
117
+ data.smtpcommand,
118
+ data.softbounce,
119
+ data.subject,
120
+ data.timestamp.to_time.utc,
121
+ data.timezoneoffset,
122
+ data.token,
123
+ ]
124
+ if @extract_mail_address
125
+ row += [
126
+ data.addresser.user,
127
+ data.addresser.host,
128
+ data.addresser.verp,
129
+ data.recipient.user,
130
+ data.recipient.host,
131
+ data.recipient.verp,
132
+ ]
133
+ end
134
+ row
135
+ end
136
+ end
137
+ end
138
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-sisimai
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Hiroyuki Sato
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-02-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 4.16.0
19
+ name: sisimai
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 4.16.0
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.1
33
+ name: embulk
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.1
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.10.6
47
+ name: bundler
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.10.6
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '10.0'
61
+ name: rake
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ description: Parses Sisimai Analyzer files read by other file input plugins.
70
+ email:
71
+ - hiroysato@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".ruby-version"
78
+ - CHANGES.md
79
+ - Gemfile
80
+ - LICENSE.txt
81
+ - README.md
82
+ - Rakefile
83
+ - embulk-parser-sisimai.gemspec
84
+ - example/Maildir/make-test-01.eml
85
+ - example/Maildir/make-test-02.eml
86
+ - example/Maildir/rfc3464-28.eml
87
+ - example/conf_column.yml
88
+ - example/conf_json.yml
89
+ - example/mail/make-test-01.eml
90
+ - example/mail/make-test-02.eml
91
+ - lib/embulk/guess/sisimai.rb
92
+ - lib/embulk/parser/sisimai.rb
93
+ homepage: https://github.com/hiroyuki-sato/embulk-parser-sisimai
94
+ licenses:
95
+ - MIT
96
+ metadata: {}
97
+ post_install_message:
98
+ rdoc_options: []
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubyforge_project:
113
+ rubygems_version: 2.5.2
114
+ signing_key:
115
+ specification_version: 4
116
+ summary: Sisimai Analyzer parser plugin for Embulk
117
+ test_files: []