substack_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7caeccaf6a01d7ec1b6856382bdb5ea3ee6b50866fc816d48f166b76037b3fd9
4
+ data.tar.gz: c595ce2474e5b84c5c8b7006d4619eb70f59a5e7485bd37dc1b12c15c68430cf
5
+ SHA512:
6
+ metadata.gz: 38da37ef304349fb1bdb68db7c1a83f404d25aec10aa3a86f1861d1cb590cd60e5b14811e3610d0c2ca6f6d397d6a025fed9387eb4025ce14712ac0971efd8cf
7
+ data.tar.gz: 6b9fbc7c8748bd34354a41268eb181df853c4849b848ca8020c3d81ad340ffb1f818cf2daed54e64f58c98f450c25f9d9b6b55685079caddc7b250b953e2e238
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ substack_parser
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.1.3
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.1
6
+ before_install: gem install bundler -v 2.1.4
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in substack_parser.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rspec", "~> 3.0"
8
+ gem 'rubyzip', '~> 1.2.1'
9
+ gem "csv", "~> 3.2"
data/Gemfile.lock ADDED
@@ -0,0 +1,42 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ substack_parser (0.1.0)
5
+ nokogiri
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ csv (3.2.7)
11
+ diff-lcs (1.5.0)
12
+ nokogiri (1.15.4-x86_64-linux)
13
+ racc (~> 1.4)
14
+ racc (1.7.1)
15
+ rake (12.3.3)
16
+ rspec (3.12.0)
17
+ rspec-core (~> 3.12.0)
18
+ rspec-expectations (~> 3.12.0)
19
+ rspec-mocks (~> 3.12.0)
20
+ rspec-core (3.12.2)
21
+ rspec-support (~> 3.12.0)
22
+ rspec-expectations (3.12.3)
23
+ diff-lcs (>= 1.2.0, < 2.0)
24
+ rspec-support (~> 3.12.0)
25
+ rspec-mocks (3.12.6)
26
+ diff-lcs (>= 1.2.0, < 2.0)
27
+ rspec-support (~> 3.12.0)
28
+ rspec-support (3.12.1)
29
+ rubyzip (1.2.4)
30
+
31
+ PLATFORMS
32
+ x86_64-linux
33
+
34
+ DEPENDENCIES
35
+ csv (~> 3.2)
36
+ rake (~> 12.0)
37
+ rspec (~> 3.0)
38
+ rubyzip (~> 1.2.1)
39
+ substack_parser!
40
+
41
+ BUNDLED WITH
42
+ 2.3.26
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2023 Mathew Thomas
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,124 @@
1
+ # SubstackParser
2
+ This gem is to help parse [Substack](https://substack.com/) exports.
3
+
4
+ ## Installation
5
+
6
+ Add this line to your application's Gemfile:
7
+
8
+ ```ruby
9
+ gem 'substack_parser'
10
+ ```
11
+
12
+ And then execute:
13
+
14
+ $ bundle install
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install substack_parser
19
+
20
+ ## Usage
21
+ To parse the export:
22
+ ```ruby
23
+ parser = SubstackParser.new("path/to/substack_export")
24
+ ```
25
+
26
+ Substack provides data for mailing lists, posts, post visits, and posts mailed.
27
+
28
+ ### Mailing List
29
+ ```ruby
30
+ parser.mailing_list # List of mailing lists
31
+
32
+ [
33
+ {
34
+ "email"=>"dummy_email1@example.com",
35
+ "active_subscription"=>"FALSE",
36
+ "expiry"=>nil,
37
+ "plan"=>"other",
38
+ "email_disabled"=>"FALSE"
39
+ "digest_enabled"=>"FALSE",
40
+ "created_at"=>"2022-11-15T07:55:22.660Z"
41
+ }, .....
42
+ ]
43
+
44
+
45
+
46
+ ```
47
+ ### Post List
48
+ ```ruby
49
+ parser.post_list # List of posts
50
+
51
+ [
52
+ {
53
+ "post_id"=>1,
54
+ "post_date"=>"2023-06-22T04:58:05.613Z",
55
+ "is_published"=>"TRUE",
56
+ "email_sent_at"=>"2023-06-22T04:58:05.675Z",
57
+ "inbox_sent_at"=>"2023-06-22T04:58:05.675Z",
58
+ "type"=>"newsletter",
59
+ "audience"=>"only_paid",
60
+ "title"=>"One",
61
+ "subtitle"=>"Lorem ipsum dolor",
62
+ "podcast_url"=>nil,
63
+ "content"=>"<p>Lorem ipsum dolor ........</p>"
64
+ }, ....
65
+ ]
66
+
67
+ ```
68
+ ### Read List
69
+
70
+ ```ruby
71
+ parser.read_list #To get all the read list
72
+
73
+ [
74
+ {
75
+ "post_id"=>"1",
76
+ "timestamp"=>"2023-01-05T10:36:08.164Z",
77
+ "email"=>"dummy_email1@gmail.com",
78
+ "post_type"=>"newsletter",
79
+ "post_audience"=>"founding",
80
+ "active_subscription"=>"TRUE",
81
+ "country"=>nil,
82
+ "city"=>nil,
83
+ "region"=>nil,
84
+ "device_type"=>nil,
85
+ "client_os"=>nil,
86
+ "client_type"=>nil,
87
+ "user_agent"=>"Mozilla/5.0"
88
+ }
89
+ ]
90
+
91
+ parser.get_read_list_for_post(post_id) #To get read list a post
92
+ ```
93
+
94
+ ### Post Email List
95
+ ```ruby
96
+
97
+ parser.emails_sent_list #To get all the post emails that were sent
98
+
99
+ [
100
+ {
101
+ "post_id"=>"1",
102
+ "timestamp"=>"2023-01-05T10:35:15.735Z",
103
+ "email"=>"dummy_email1@gmail.com",
104
+ "post_type"=>"newsletter",
105
+ "post_audience"=>"founding",
106
+ "active_subscription"=>"FALSE"
107
+ } ......
108
+ ]
109
+
110
+
111
+ parser.get_emails_sent_list_for_post(post_id) #To get emails sent for a post
112
+
113
+ ```
114
+
115
+
116
+
117
+ ## Contributing
118
+
119
+ Bug reports and pull requests are welcome on GitHub at https://github.com/betacraft/substack_parser.
120
+
121
+
122
+ ## License
123
+
124
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "substack_parser"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,86 @@
1
+ require 'zip'
2
+ require 'csv'
3
+
4
+ class SubstackParser
5
+ attr_reader :file_path
6
+
7
+ def initialize(file_path)
8
+ @file_path = file_path
9
+ validate_zip
10
+ end
11
+
12
+ def mailing_list
13
+ raise "email_list could not be found" if email_list_file.nil?
14
+ parse_csv(email_list_file.name)
15
+ end
16
+
17
+ def post_list
18
+ posts = parse_csv('posts.csv')
19
+ posts.map do |post|
20
+ post_id = post["post_id"]
21
+ post["post_id"] = post_id.to_i
22
+ post.merge(post_content(post_id))
23
+ end
24
+ end
25
+
26
+ def get_read_list_for_post(post_id)
27
+ parse_csv("posts/#{post_id}.opens.csv")
28
+ end
29
+
30
+ def get_emails_sent_list_for_post(post_id)
31
+ parse_csv("posts/#{post_id}.delivers.csv")
32
+ end
33
+
34
+ def read_list
35
+ grouped_post_details('posts/*.opens.csv')
36
+ end
37
+
38
+ def emails_sent_list
39
+ grouped_post_details('posts/*.delivers.csv')
40
+ end
41
+
42
+ private
43
+
44
+ def unzip_file
45
+ @unzip_file ||= Zip::File.open(file_path)
46
+ end
47
+
48
+ def email_list_file
49
+ unzip_file.select{|tt| tt.name.start_with? "email_list"}.first
50
+ end
51
+
52
+ def parse_csv(filename)
53
+ content = get_zipped_file_content(filename)
54
+ if content.nil?
55
+ {}
56
+ else
57
+ CSV.parse(content, headers: true).map(&:to_h)
58
+ end
59
+ end
60
+
61
+ def post_content(post_id)
62
+ content = get_zipped_file_content("posts/#{post_id}.html")
63
+ { "content" => content }
64
+ end
65
+
66
+ def get_zipped_file_content(filename)
67
+ entry = unzip_file.glob(filename).first
68
+ begin
69
+ entry.get_input_stream.read
70
+ rescue NoMethodError
71
+ nil
72
+ end
73
+ end
74
+
75
+ def grouped_post_details(path)
76
+ unzip_file.glob(path).flat_map do |file|
77
+ content = file.get_input_stream.read
78
+ CSV.parse(content, headers: true).map(&:to_h)
79
+ end
80
+ end
81
+
82
+ def validate_zip
83
+ raise "File is not a zip" unless file_path.end_with?('.zip')
84
+ raise "File not found" unless File.exist?(file_path)
85
+ end
86
+ end
@@ -0,0 +1,24 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "substack_parser"
4
+ spec.version = "0.0.1"
5
+ spec.authors = ["Mathew Thomas"]
6
+ spec.email = ["mathew@betacraft.com"]
7
+
8
+ spec.summary = %q{This gem is to help parse Substack exports.}
9
+ spec.homepage = "https://github.com/betacraft/substack_parser"
10
+ spec.license = "MIT"
11
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
12
+
13
+ spec.metadata["homepage_uri"] = "https://github.com/betacraft/substack_parser"
14
+ spec.metadata["source_code_uri"] = "https://github.com/betacraft/substack_parser"
15
+
16
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
17
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ end
19
+ spec.bindir = "exe"
20
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
+ spec.require_paths = ["lib"]
22
+ spec.add_dependency("csv", "~> 3.2")
23
+ spec.add_dependency("rubyzip", "~> 1.2.1")
24
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: substack_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Mathew Thomas
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2023-08-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rubyzip
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.2.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.2.1
41
+ description:
42
+ email:
43
+ - mathew@betacraft.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - ".rspec"
50
+ - ".ruby-gemset"
51
+ - ".ruby-version"
52
+ - ".travis.yml"
53
+ - Gemfile
54
+ - Gemfile.lock
55
+ - LICENSE.txt
56
+ - README.md
57
+ - Rakefile
58
+ - bin/console
59
+ - bin/setup
60
+ - lib/substack_parser.rb
61
+ - substack_parser.gemspec
62
+ homepage: https://github.com/betacraft/substack_parser
63
+ licenses:
64
+ - MIT
65
+ metadata:
66
+ homepage_uri: https://github.com/betacraft/substack_parser
67
+ source_code_uri: https://github.com/betacraft/substack_parser
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: 2.3.0
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubygems_version: 3.3.26
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: This gem is to help parse Substack exports.
87
+ test_files: []