bsky-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/bsky_parser.rb +137 -0
- metadata +58 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7aec83d0a4eabb6449e51290505c4497ea2f7b4e9f43c42d775ab573bd8e52c7
|
4
|
+
data.tar.gz: 5cef2aabe26ed6123550b7f6bb25d20266ab5e5f01caff8fd2973ec23146e574
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2bf88f931df9a6f9b664bb197afec3b51521bff88f612940f8f820d17a7b02bf412b3d83d0864dc412d4c6a241325f6a54dd24ac145118f37be79ed1decfb0fd
|
7
|
+
data.tar.gz: 3f197e06e7eaee4957c43cd693a076d8c5edd4e2943c61b5628f83f7e17a331ac9b10c8ab545bd6fccf8b6781b259dd326d75aec3d18877a3f2722d3bb8a8351
|
data/lib/bsky_parser.rb
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
require "faraday"
|
2
|
+
|
3
|
+
class BskyParser
|
4
|
+
class << self
|
5
|
+
BASE_URL = "https://bsky.social"
|
6
|
+
|
7
|
+
def parse(content)
|
8
|
+
parsed_content, mkdown_facets = process_markdown_links(content)
|
9
|
+
|
10
|
+
facets = mkdown_facets + tag_facets(parsed_content) + mention_facets(parsed_content) + url_facets(parsed_content)
|
11
|
+
|
12
|
+
[ parsed_content, facets]
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def conn
|
18
|
+
@conn ||= Faraday.new(url: BASE_URL) do |f|
|
19
|
+
f.request :json
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def tag_facets(content)
|
24
|
+
facets = []
|
25
|
+
tag_pattern = /#\S+/
|
26
|
+
matches = content.to_enum(:scan, tag_pattern).map { { tag: Regexp.last_match, indices: Regexp.last_match.offset(0) } }
|
27
|
+
matches.each do |match|
|
28
|
+
tag = match[:tag].to_s[1..-1] # Trim leading hashtag
|
29
|
+
indices = match[:indices]
|
30
|
+
facets << {
|
31
|
+
index: {
|
32
|
+
byteStart: indices[0],
|
33
|
+
byteEnd: indices[1]
|
34
|
+
},
|
35
|
+
features: [ {
|
36
|
+
"$type": "app.bsky.richtext.facet#tag",
|
37
|
+
tag: tag
|
38
|
+
} ]
|
39
|
+
}
|
40
|
+
end
|
41
|
+
facets
|
42
|
+
end
|
43
|
+
|
44
|
+
def mention_facets(content)
|
45
|
+
facets = []
|
46
|
+
# regex based on: https://atproto.com/specs/handle#handle-identifier-syntax
|
47
|
+
mention_pattern = /[$|\W](@([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)/
|
48
|
+
matches = content.to_enum(:scan, mention_pattern).map { { handle: Regexp.last_match, indices: Regexp.last_match.offset(0) } }
|
49
|
+
matches.each do |match|
|
50
|
+
handle = match[:handle].to_s.strip[1..-1] # Trim leading @
|
51
|
+
indices = match[:indices]
|
52
|
+
resp = conn.get("/xrpc/com.atproto.identity.resolveHandle", { handle: handle })
|
53
|
+
handle_did = JSON.parse(resp.body)["did"]
|
54
|
+
facets << {
|
55
|
+
index: {
|
56
|
+
byteStart: indices[0],
|
57
|
+
byteEnd: indices[1]
|
58
|
+
},
|
59
|
+
features: [ {
|
60
|
+
"$type": "app.bsky.richtext.facet#mention",
|
61
|
+
did: handle_did
|
62
|
+
} ]
|
63
|
+
}
|
64
|
+
end
|
65
|
+
facets
|
66
|
+
end
|
67
|
+
|
68
|
+
def mkdown_links(content)
|
69
|
+
mkdown_links = []
|
70
|
+
url_pattern = /\[(?<text>[^\]]+)\]\((?<url>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*[-a-zA-Z0-9@%_\+~#\/\/=])?)\)/
|
71
|
+
matches = content.to_enum(:scan, url_pattern).map { Regexp.last_match }
|
72
|
+
|
73
|
+
mkdown_links = []
|
74
|
+
matches.each do |match|
|
75
|
+
mkdown_links << {
|
76
|
+
text: match[:text],
|
77
|
+
link: match[:url],
|
78
|
+
match: match.to_s
|
79
|
+
}
|
80
|
+
end
|
81
|
+
mkdown_links
|
82
|
+
end
|
83
|
+
|
84
|
+
def process_markdown_links(content)
|
85
|
+
facets = []
|
86
|
+
result_text = content.dup
|
87
|
+
|
88
|
+
links = mkdown_links(content)
|
89
|
+
|
90
|
+
links.reverse_each do |link|
|
91
|
+
start_pos = result_text.index(link[:match])
|
92
|
+
|
93
|
+
if start_pos
|
94
|
+
end_pos = start_pos + link[:match].length
|
95
|
+
|
96
|
+
facets << {
|
97
|
+
index: {
|
98
|
+
byteStart: start_pos,
|
99
|
+
byteEnd: start_pos + link[:text].length
|
100
|
+
},
|
101
|
+
features: [ {
|
102
|
+
"$type": "app.bsky.richtext.facet#link",
|
103
|
+
uri: link[:link]
|
104
|
+
} ]
|
105
|
+
}
|
106
|
+
|
107
|
+
result_text[start_pos...end_pos] = link[:text]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
[ result_text, facets ]
|
112
|
+
end
|
113
|
+
|
114
|
+
def url_facets(content)
|
115
|
+
facets = []
|
116
|
+
# URL pattern from https://docs.bsky.app/docs/advanced-guides/post-richtext
|
117
|
+
url_pattern = /([$|\W])(https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*[-a-zA-Z0-9@%_\+~#\/\/=])?)/
|
118
|
+
matches = content.to_enum(:scan, url_pattern).map { { url: Regexp.last_match, indices: Regexp.last_match.offset(0) } }
|
119
|
+
matches.each do |match|
|
120
|
+
url = match[:url].to_s[1..-1]
|
121
|
+
indices = match[:indices]
|
122
|
+
facets << {
|
123
|
+
index: {
|
124
|
+
byteStart: indices[0],
|
125
|
+
byteEnd: indices[1]
|
126
|
+
},
|
127
|
+
features: [ {
|
128
|
+
"$type": "app.bsky.richtext.facet#link",
|
129
|
+
uri: url
|
130
|
+
} ]
|
131
|
+
}
|
132
|
+
end
|
133
|
+
facets
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bsky-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jonathan Yeong
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2025-02-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: faraday
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.12.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.12.2
|
27
|
+
description:
|
28
|
+
email: hey@jonathanyeong.com
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- lib/bsky_parser.rb
|
34
|
+
homepage: https://rubygems.org/gems/bsky_parser
|
35
|
+
licenses:
|
36
|
+
- MIT
|
37
|
+
metadata:
|
38
|
+
source_code_uri: https://github.com/jonathanyeong/bsky-parser
|
39
|
+
post_install_message:
|
40
|
+
rdoc_options: []
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 3.3.6
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
requirements: []
|
54
|
+
rubygems_version: 3.5.22
|
55
|
+
signing_key:
|
56
|
+
specification_version: 4
|
57
|
+
summary: Parses text and generates Bluesky rich text facets
|
58
|
+
test_files: []
|