substack_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7caeccaf6a01d7ec1b6856382bdb5ea3ee6b50866fc816d48f166b76037b3fd9
4
+ data.tar.gz: c595ce2474e5b84c5c8b7006d4619eb70f59a5e7485bd37dc1b12c15c68430cf
5
+ SHA512:
6
+ metadata.gz: 38da37ef304349fb1bdb68db7c1a83f404d25aec10aa3a86f1861d1cb590cd60e5b14811e3610d0c2ca6f6d397d6a025fed9387eb4025ce14712ac0971efd8cf
7
+ data.tar.gz: 6b9fbc7c8748bd34354a41268eb181df853c4849b848ca8020c3d81ad340ffb1f818cf2daed54e64f58c98f450c25f9d9b6b55685079caddc7b250b953e2e238
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ substack_parser
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.1.3
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.1
6
+ before_install: gem install bundler -v 2.1.4
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in substack_parser.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rspec", "~> 3.0"
8
+ gem 'rubyzip', '~> 1.2.1'
9
+ gem "csv", "~> 3.2"
data/Gemfile.lock ADDED
@@ -0,0 +1,42 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ substack_parser (0.1.0)
5
+ nokogiri
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ csv (3.2.7)
11
+ diff-lcs (1.5.0)
12
+ nokogiri (1.15.4-x86_64-linux)
13
+ racc (~> 1.4)
14
+ racc (1.7.1)
15
+ rake (12.3.3)
16
+ rspec (3.12.0)
17
+ rspec-core (~> 3.12.0)
18
+ rspec-expectations (~> 3.12.0)
19
+ rspec-mocks (~> 3.12.0)
20
+ rspec-core (3.12.2)
21
+ rspec-support (~> 3.12.0)
22
+ rspec-expectations (3.12.3)
23
+ diff-lcs (>= 1.2.0, < 2.0)
24
+ rspec-support (~> 3.12.0)
25
+ rspec-mocks (3.12.6)
26
+ diff-lcs (>= 1.2.0, < 2.0)
27
+ rspec-support (~> 3.12.0)
28
+ rspec-support (3.12.1)
29
+ rubyzip (1.2.4)
30
+
31
+ PLATFORMS
32
+ x86_64-linux
33
+
34
+ DEPENDENCIES
35
+ csv (~> 3.2)
36
+ rake (~> 12.0)
37
+ rspec (~> 3.0)
38
+ rubyzip (~> 1.2.1)
39
+ substack_parser!
40
+
41
+ BUNDLED WITH
42
+ 2.3.26
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2023 Mathew Thomas
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,124 @@
1
+ # SubstackParser
2
+ This gem is to help parse [Substack](https://substack.com/) exports.
3
+
4
+ ## Installation
5
+
6
+ Add this line to your application's Gemfile:
7
+
8
+ ```ruby
9
+ gem 'substack_parser'
10
+ ```
11
+
12
+ And then execute:
13
+
14
+ $ bundle install
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install substack_parser
19
+
20
+ ## Usage
21
+ To parse the export:
22
+ ```ruby
23
+ parser = SubstackParser.new("path/to/substack_export")
24
+ ```
25
+
26
+ Substack provides data for mailing lists, posts, post visits, and posts mailed.
27
+
28
+ ### Mailing List
29
+ ```ruby
30
+ parser.mailing_list # List of mailing lists
31
+
32
+ [
33
+ {
34
+ "email"=>"dummy_email1@example.com",
35
+ "active_subscription"=>"FALSE",
36
+ "expiry"=>nil,
37
+ "plan"=>"other",
38
+ "email_disabled"=>"FALSE"
39
+ "digest_enabled"=>"FALSE",
40
+ "created_at"=>"2022-11-15T07:55:22.660Z"
41
+ }, .....
42
+ ]
43
+
44
+
45
+
46
+ ```
47
+ ### Post List
48
+ ```ruby
49
+ parser.post_list # List of posts
50
+
51
+ [
52
+ {
53
+ "post_id"=>1,
54
+ "post_date"=>"2023-06-22T04:58:05.613Z",
55
+ "is_published"=>"TRUE",
56
+ "email_sent_at"=>"2023-06-22T04:58:05.675Z",
57
+ "inbox_sent_at"=>"2023-06-22T04:58:05.675Z",
58
+ "type"=>"newsletter",
59
+ "audience"=>"only_paid",
60
+ "title"=>"One",
61
+ "subtitle"=>"Lorem ipsum dolor",
62
+ "podcast_url"=>nil,
63
+ "content"=>"<p>Lorem ipsum dolor ........</p>"
64
+ }, ....
65
+ ]
66
+
67
+ ```
68
+ ### Read List
69
+
70
+ ```ruby
71
+ parser.read_list #To get all the read list
72
+
73
+ [
74
+ {
75
+ "post_id"=>"1",
76
+ "timestamp"=>"2023-01-05T10:36:08.164Z",
77
+ "email"=>"dummy_email1@gmail.com",
78
+ "post_type"=>"newsletter",
79
+ "post_audience"=>"founding",
80
+ "active_subscription"=>"TRUE",
81
+ "country"=>nil,
82
+ "city"=>nil,
83
+ "region"=>nil,
84
+ "device_type"=>nil,
85
+ "client_os"=>nil,
86
+ "client_type"=>nil,
87
+ "user_agent"=>"Mozilla/5.0"
88
+ }
89
+ ]
90
+
91
+ parser.get_read_list_for_post(post_id) #To get read list a post
92
+ ```
93
+
94
+ ### Post Email List
95
+ ```ruby
96
+
97
+ parser.emails_sent_list #To get all the post emails that were sent
98
+
99
+ [
100
+ {
101
+ "post_id"=>"1",
102
+ "timestamp"=>"2023-01-05T10:35:15.735Z",
103
+ "email"=>"dummy_email1@gmail.com",
104
+ "post_type"=>"newsletter",
105
+ "post_audience"=>"founding",
106
+ "active_subscription"=>"FALSE"
107
+ } ......
108
+ ]
109
+
110
+
111
+ parser.get_emails_sent_list_for_post(post_id) #To get emails sent for a post
112
+
113
+ ```
114
+
115
+
116
+
117
+ ## Contributing
118
+
119
+ Bug reports and pull requests are welcome on GitHub at https://github.com/betacraft/substack_parser.
120
+
121
+
122
+ ## License
123
+
124
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "substack_parser"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,86 @@
1
+ require 'zip'
2
+ require 'csv'
3
+
4
+ class SubstackParser
5
+ attr_reader :file_path
6
+
7
+ def initialize(file_path)
8
+ @file_path = file_path
9
+ validate_zip
10
+ end
11
+
12
+ def mailing_list
13
+ raise "email_list could not be found" if email_list_file.nil?
14
+ parse_csv(email_list_file.name)
15
+ end
16
+
17
+ def post_list
18
+ posts = parse_csv('posts.csv')
19
+ posts.map do |post|
20
+ post_id = post["post_id"]
21
+ post["post_id"] = post_id.to_i
22
+ post.merge(post_content(post_id))
23
+ end
24
+ end
25
+
26
+ def get_read_list_for_post(post_id)
27
+ parse_csv("posts/#{post_id}.opens.csv")
28
+ end
29
+
30
+ def get_emails_sent_list_for_post(post_id)
31
+ parse_csv("posts/#{post_id}.delivers.csv")
32
+ end
33
+
34
+ def read_list
35
+ grouped_post_details('posts/*.opens.csv')
36
+ end
37
+
38
+ def emails_sent_list
39
+ grouped_post_details('posts/*.delivers.csv')
40
+ end
41
+
42
+ private
43
+
44
+ def unzip_file
45
+ @unzip_file ||= Zip::File.open(file_path)
46
+ end
47
+
48
+ def email_list_file
49
+ unzip_file.select{|tt| tt.name.start_with? "email_list"}.first
50
+ end
51
+
52
+ def parse_csv(filename)
53
+ content = get_zipped_file_content(filename)
54
+ if content.nil?
55
+ {}
56
+ else
57
+ CSV.parse(content, headers: true).map(&:to_h)
58
+ end
59
+ end
60
+
61
+ def post_content(post_id)
62
+ content = get_zipped_file_content("posts/#{post_id}.html")
63
+ { "content" => content }
64
+ end
65
+
66
+ def get_zipped_file_content(filename)
67
+ entry = unzip_file.glob(filename).first
68
+ begin
69
+ entry.get_input_stream.read
70
+ rescue NoMethodError
71
+ nil
72
+ end
73
+ end
74
+
75
+ def grouped_post_details(path)
76
+ unzip_file.glob(path).flat_map do |file|
77
+ content = file.get_input_stream.read
78
+ CSV.parse(content, headers: true).map(&:to_h)
79
+ end
80
+ end
81
+
82
+ def validate_zip
83
+ raise "File is not a zip" unless file_path.end_with?('.zip')
84
+ raise "File not found" unless File.exist?(file_path)
85
+ end
86
+ end
@@ -0,0 +1,24 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "substack_parser"
4
+ spec.version = "0.0.1"
5
+ spec.authors = ["Mathew Thomas"]
6
+ spec.email = ["mathew@betacraft.com"]
7
+
8
+ spec.summary = %q{This gem is to help parse Substack exports.}
9
+ spec.homepage = "https://github.com/betacraft/substack_parser"
10
+ spec.license = "MIT"
11
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
12
+
13
+ spec.metadata["homepage_uri"] = "https://github.com/betacraft/substack_parser"
14
+ spec.metadata["source_code_uri"] = "https://github.com/betacraft/substack_parser"
15
+
16
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
17
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ end
19
+ spec.bindir = "exe"
20
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
+ spec.require_paths = ["lib"]
22
+ spec.add_dependency("csv", "~> 3.2")
23
+ spec.add_dependency("rubyzip", "~> 1.2.1")
24
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: substack_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Mathew Thomas
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2023-08-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rubyzip
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.2.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.2.1
41
+ description:
42
+ email:
43
+ - mathew@betacraft.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - ".rspec"
50
+ - ".ruby-gemset"
51
+ - ".ruby-version"
52
+ - ".travis.yml"
53
+ - Gemfile
54
+ - Gemfile.lock
55
+ - LICENSE.txt
56
+ - README.md
57
+ - Rakefile
58
+ - bin/console
59
+ - bin/setup
60
+ - lib/substack_parser.rb
61
+ - substack_parser.gemspec
62
+ homepage: https://github.com/betacraft/substack_parser
63
+ licenses:
64
+ - MIT
65
+ metadata:
66
+ homepage_uri: https://github.com/betacraft/substack_parser
67
+ source_code_uri: https://github.com/betacraft/substack_parser
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: 2.3.0
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubygems_version: 3.3.26
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: This gem is to help parse Substack exports.
87
+ test_files: []