bsky-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/bsky_parser.rb +137 -0
  3. metadata +58 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7aec83d0a4eabb6449e51290505c4497ea2f7b4e9f43c42d775ab573bd8e52c7
4
+ data.tar.gz: 5cef2aabe26ed6123550b7f6bb25d20266ab5e5f01caff8fd2973ec23146e574
5
+ SHA512:
6
+ metadata.gz: 2bf88f931df9a6f9b664bb197afec3b51521bff88f612940f8f820d17a7b02bf412b3d83d0864dc412d4c6a241325f6a54dd24ac145118f37be79ed1decfb0fd
7
+ data.tar.gz: 3f197e06e7eaee4957c43cd693a076d8c5edd4e2943c61b5628f83f7e17a331ac9b10c8ab545bd6fccf8b6781b259dd326d75aec3d18877a3f2722d3bb8a8351
@@ -0,0 +1,137 @@
1
+ require "faraday"
2
+
3
+ class BskyParser
4
+ class << self
5
+ BASE_URL = "https://bsky.social"
6
+
7
+ def parse(content)
8
+ parsed_content, mkdown_facets = process_markdown_links(content)
9
+
10
+ facets = mkdown_facets + tag_facets(parsed_content) + mention_facets(parsed_content) + url_facets(parsed_content)
11
+
12
+ [ parsed_content, facets]
13
+ end
14
+
15
+ private
16
+
17
+ def conn
18
+ @conn ||= Faraday.new(url: BASE_URL) do |f|
19
+ f.request :json
20
+ end
21
+ end
22
+
23
+ def tag_facets(content)
24
+ facets = []
25
+ tag_pattern = /#\S+/
26
+ matches = content.to_enum(:scan, tag_pattern).map { { tag: Regexp.last_match, indices: Regexp.last_match.offset(0) } }
27
+ matches.each do |match|
28
+ tag = match[:tag].to_s[1..-1] # Trim leading hashtag
29
+ indices = match[:indices]
30
+ facets << {
31
+ index: {
32
+ byteStart: indices[0],
33
+ byteEnd: indices[1]
34
+ },
35
+ features: [ {
36
+ "$type": "app.bsky.richtext.facet#tag",
37
+ tag: tag
38
+ } ]
39
+ }
40
+ end
41
+ facets
42
+ end
43
+
44
+ def mention_facets(content)
45
+ facets = []
46
+ # regex based on: https://atproto.com/specs/handle#handle-identifier-syntax
47
+ mention_pattern = /[$|\W](@([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)/
48
+ matches = content.to_enum(:scan, mention_pattern).map { { handle: Regexp.last_match, indices: Regexp.last_match.offset(0) } }
49
+ matches.each do |match|
50
+ handle = match[:handle].to_s.strip[1..-1] # Trim leading @
51
+ indices = match[:indices]
52
+ resp = conn.get("/xrpc/com.atproto.identity.resolveHandle", { handle: handle })
53
+ handle_did = JSON.parse(resp.body)["did"]
54
+ facets << {
55
+ index: {
56
+ byteStart: indices[0],
57
+ byteEnd: indices[1]
58
+ },
59
+ features: [ {
60
+ "$type": "app.bsky.richtext.facet#mention",
61
+ did: handle_did
62
+ } ]
63
+ }
64
+ end
65
+ facets
66
+ end
67
+
68
+ def mkdown_links(content)
69
+ mkdown_links = []
70
+ url_pattern = /\[(?<text>[^\]]+)\]\((?<url>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*[-a-zA-Z0-9@%_\+~#\/\/=])?)\)/
71
+ matches = content.to_enum(:scan, url_pattern).map { Regexp.last_match }
72
+
73
+ mkdown_links = []
74
+ matches.each do |match|
75
+ mkdown_links << {
76
+ text: match[:text],
77
+ link: match[:url],
78
+ match: match.to_s
79
+ }
80
+ end
81
+ mkdown_links
82
+ end
83
+
84
+ def process_markdown_links(content)
85
+ facets = []
86
+ result_text = content.dup
87
+
88
+ links = mkdown_links(content)
89
+
90
+ links.reverse_each do |link|
91
+ start_pos = result_text.index(link[:match])
92
+
93
+ if start_pos
94
+ end_pos = start_pos + link[:match].length
95
+
96
+ facets << {
97
+ index: {
98
+ byteStart: start_pos,
99
+ byteEnd: start_pos + link[:text].length
100
+ },
101
+ features: [ {
102
+ "$type": "app.bsky.richtext.facet#link",
103
+ uri: link[:link]
104
+ } ]
105
+ }
106
+
107
+ result_text[start_pos...end_pos] = link[:text]
108
+ end
109
+ end
110
+
111
+ [ result_text, facets ]
112
+ end
113
+
114
+ def url_facets(content)
115
+ facets = []
116
+ # URL pattern from https://docs.bsky.app/docs/advanced-guides/post-richtext
117
+ url_pattern = /([$|\W])(https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*[-a-zA-Z0-9@%_\+~#\/\/=])?)/
118
+ matches = content.to_enum(:scan, url_pattern).map { { url: Regexp.last_match, indices: Regexp.last_match.offset(0) } }
119
+ matches.each do |match|
120
+ url = match[:url].to_s[1..-1]
121
+ indices = match[:indices]
122
+ facets << {
123
+ index: {
124
+ byteStart: indices[0],
125
+ byteEnd: indices[1]
126
+ },
127
+ features: [ {
128
+ "$type": "app.bsky.richtext.facet#link",
129
+ uri: url
130
+ } ]
131
+ }
132
+ end
133
+ facets
134
+ end
135
+ end
136
+ end
137
+
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bsky-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Jonathan Yeong
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2025-02-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 2.12.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 2.12.2
27
+ description:
28
+ email: hey@jonathanyeong.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/bsky_parser.rb
34
+ homepage: https://rubygems.org/gems/bsky_parser
35
+ licenses:
36
+ - MIT
37
+ metadata:
38
+ source_code_uri: https://github.com/jonathanyeong/bsky-parser
39
+ post_install_message:
40
+ rdoc_options: []
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 3.3.6
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubygems_version: 3.5.22
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: Parses text and generates Bluesky rich text facets
58
+ test_files: []