bsky-parser 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7aec83d0a4eabb6449e51290505c4497ea2f7b4e9f43c42d775ab573bd8e52c7
4
- data.tar.gz: 5cef2aabe26ed6123550b7f6bb25d20266ab5e5f01caff8fd2973ec23146e574
3
+ metadata.gz: 9644188145dd0c8339bac95a664eb8571b4e0491ecb4a6caecca335e8cbeeee7
4
+ data.tar.gz: d79bf56d7329841971cc3e2d1d5149216603d3bdd863212a048e96835c49de2f
5
5
  SHA512:
6
- metadata.gz: 2bf88f931df9a6f9b664bb197afec3b51521bff88f612940f8f820d17a7b02bf412b3d83d0864dc412d4c6a241325f6a54dd24ac145118f37be79ed1decfb0fd
7
- data.tar.gz: 3f197e06e7eaee4957c43cd693a076d8c5edd4e2943c61b5628f83f7e17a331ac9b10c8ab545bd6fccf8b6781b259dd326d75aec3d18877a3f2722d3bb8a8351
6
+ metadata.gz: 378e2e528768da19d162f68ffdbf0f48e4fd3d1f4a47fab457e6a643707314f9b4422b4210d169004c3d59661a839bfd6a8b68d44e2eaa4e963c779e103a40e3
7
+ data.tar.gz: f33448b0ac7a7cb653e020ddda3c3a67ff3d1fa0a48133bd4449567398298b3b8d4f3783b3eb32816abbf28a36aee30c4da040839e3430a63eda7f95c5b9239b
@@ -0,0 +1,25 @@
1
+ // For format details, see https://aka.ms/devcontainer.json. For config options, see the
2
+ // README at: https://github.com/devcontainers/templates/tree/main/src/ruby
3
+ {
4
+ "name": "Ruby",
5
+ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
6
+ "image": "mcr.microsoft.com/devcontainers/ruby:1-3.3-bullseye",
7
+ "features": {
8
+ "ghcr.io/devcontainers/features/git:1": {}
9
+ }
10
+
11
+ // Features to add to the dev container. More info: https://containers.dev/features.
12
+ // "features": {},
13
+
14
+ // Use 'forwardPorts' to make a list of ports inside the container available locally.
15
+ // "forwardPorts": [],
16
+
17
+ // Use 'postCreateCommand' to run commands after the container is created.
18
+ // "postCreateCommand": "ruby --version",
19
+
20
+ // Configure tool-specific properties.
21
+ // "customizations": {},
22
+
23
+ // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
24
+ // "remoteUser": "root"
25
+ }
data/.rubocop.yml ADDED
@@ -0,0 +1,47 @@
1
+ AllCops:
2
+ TargetRubyVersion: 3.3
3
+ NewCops: enable
4
+
5
+ Style/StringLiterals:
6
+ EnforcedStyle: double_quotes
7
+
8
+ Style/StringLiteralsInInterpolation:
9
+ EnforcedStyle: double_quotes
10
+
11
+ Style/Documentation:
12
+ Enabled: false
13
+
14
+ Style/FrozenStringLiteralComment:
15
+ Enabled: true
16
+ EnforcedStyle: always
17
+
18
+ Metrics/MethodLength:
19
+ Max: 20
20
+ Exclude:
21
+ - "test/**/*"
22
+
23
+ Metrics/AbcSize:
24
+ Max: 30
25
+ Exclude:
26
+ - 'test/**/*'
27
+
28
+ Metrics/ClassLength:
29
+ Enabled: true
30
+ Max: 150
31
+ CountComments: false
32
+ Exclude:
33
+ - "test/**/*"
34
+ - "spec/**/*"
35
+
36
+ Metrics/ModuleLength:
37
+ Enabled: true
38
+ Max: 150
39
+ CountComments: false
40
+ Exclude:
41
+ - "test/**/*"
42
+ - "spec/**/*"
43
+
44
+
45
+ plugins:
46
+ - rubocop-rake
47
+ - rubocop-minitest
data/CHANGELOG.md ADDED
@@ -0,0 +1,23 @@
1
+ ## [Unreleased]
2
+
3
+ ## [1.0.2] - 2025-02-25
4
+
5
+ ### Fixed
6
+
7
+ - Fix tag facet regex. Previously, it would match this entire string: `#hello!`. Now it correctly matches `#hello` without the `!`.
8
+ - Regex patterns for mentions, tags, and URL facets correctly match when they appear at the beginning of text without requiring a leading space. For example, `"#hello"` is now properly detected as a tag, while mid-word occurrences like `"hello#hello"` are still ignored.
9
+ - URL facet has been fixed to not match if it occurs mid-word. E.g. `hellohttps://example.com` no longer matches.
10
+ - Correctly handle indices to take into account of leading space with multiple matches.
11
+
12
+ ## [1.0.1] - 2025-02-19
13
+
14
+ ### Added
15
+ - More files from bundle gem generator. Originally, I followed the rubygem guide which listed less files.
16
+ - Added LICENSE
17
+
18
+ ### Fixed
19
+ - Fixed Rubocop linting rule errors.
20
+
21
+ ## [1.0.0] - 2025-02-19
22
+
23
+ - Initial Release.
@@ -0,0 +1,132 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, caste, color, religion, or sexual
10
+ identity and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ * Demonstrating empathy and kindness toward other people
21
+ * Being respectful of differing opinions, viewpoints, and experiences
22
+ * Giving and gracefully accepting constructive feedback
23
+ * Accepting responsibility and apologizing to those affected by our mistakes,
24
+ and learning from the experience
25
+ * Focusing on what is best not just for us as individuals, but for the overall
26
+ community
27
+
28
+ Examples of unacceptable behavior include:
29
+
30
+ * The use of sexualized language or imagery, and sexual attention or advances of
31
+ any kind
32
+ * Trolling, insulting or derogatory comments, and personal or political attacks
33
+ * Public or private harassment
34
+ * Publishing others' private information, such as a physical or email address,
35
+ without their explicit permission
36
+ * Other conduct which could reasonably be considered inappropriate in a
37
+ professional setting
38
+
39
+ ## Enforcement Responsibilities
40
+
41
+ Community leaders are responsible for clarifying and enforcing our standards of
42
+ acceptable behavior and will take appropriate and fair corrective action in
43
+ response to any behavior that they deem inappropriate, threatening, offensive,
44
+ or harmful.
45
+
46
+ Community leaders have the right and responsibility to remove, edit, or reject
47
+ comments, commits, code, wiki edits, issues, and other contributions that are
48
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
49
+ decisions when appropriate.
50
+
51
+ ## Scope
52
+
53
+ This Code of Conduct applies within all community spaces, and also applies when
54
+ an individual is officially representing the community in public spaces.
55
+ Examples of representing our community include using an official email address,
56
+ posting via an official social media account, or acting as an appointed
57
+ representative at an online or offline event.
58
+
59
+ ## Enforcement
60
+
61
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
62
+ reported to the community leaders responsible for enforcement at
63
+ hey@jonathanyeong.com.
64
+ All complaints will be reviewed and investigated promptly and fairly.
65
+
66
+ All community leaders are obligated to respect the privacy and security of the
67
+ reporter of any incident.
68
+
69
+ ## Enforcement Guidelines
70
+
71
+ Community leaders will follow these Community Impact Guidelines in determining
72
+ the consequences for any action they deem in violation of this Code of Conduct:
73
+
74
+ ### 1. Correction
75
+
76
+ **Community Impact**: Use of inappropriate language or other behavior deemed
77
+ unprofessional or unwelcome in the community.
78
+
79
+ **Consequence**: A private, written warning from community leaders, providing
80
+ clarity around the nature of the violation and an explanation of why the
81
+ behavior was inappropriate. A public apology may be requested.
82
+
83
+ ### 2. Warning
84
+
85
+ **Community Impact**: A violation through a single incident or series of
86
+ actions.
87
+
88
+ **Consequence**: A warning with consequences for continued behavior. No
89
+ interaction with the people involved, including unsolicited interaction with
90
+ those enforcing the Code of Conduct, for a specified period of time. This
91
+ includes avoiding interactions in community spaces as well as external channels
92
+ like social media. Violating these terms may lead to a temporary or permanent
93
+ ban.
94
+
95
+ ### 3. Temporary Ban
96
+
97
+ **Community Impact**: A serious violation of community standards, including
98
+ sustained inappropriate behavior.
99
+
100
+ **Consequence**: A temporary ban from any sort of interaction or public
101
+ communication with the community for a specified period of time. No public or
102
+ private interaction with the people involved, including unsolicited interaction
103
+ with those enforcing the Code of Conduct, is allowed during this period.
104
+ Violating these terms may lead to a permanent ban.
105
+
106
+ ### 4. Permanent Ban
107
+
108
+ **Community Impact**: Demonstrating a pattern of violation of community
109
+ standards, including sustained inappropriate behavior, harassment of an
110
+ individual, or aggression toward or disparagement of classes of individuals.
111
+
112
+ **Consequence**: A permanent ban from any sort of public interaction within the
113
+ community.
114
+
115
+ ## Attribution
116
+
117
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118
+ version 2.1, available at
119
+ [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
120
+
121
+ Community Impact Guidelines were inspired by
122
+ [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
123
+
124
+ For answers to common questions about this code of conduct, see the FAQ at
125
+ [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
126
+ [https://www.contributor-covenant.org/translations][translations].
127
+
128
+ [homepage]: https://www.contributor-covenant.org
129
+ [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
130
+ [Mozilla CoC]: https://github.com/mozilla/diversity
131
+ [FAQ]: https://www.contributor-covenant.org/faq
132
+ [translations]: https://www.contributor-covenant.org/translations
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Jonathan Yeong
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # Bsky Parser
2
+
3
+ Gem that will parse text content and generate Bluesky rich text facets.
4
+
5
+ Facets supported:
6
+
7
+ - Mentions aka @handles
8
+ - Hashtags
9
+ - URLs as well as markdown-style links
10
+
11
+ ## Installation
12
+
13
+ Install the gem and add to the application's Gemfile by executing:
14
+
15
+ ```bash
16
+ $ bundle add bsky-parser
17
+ $ bundle install
18
+ ```
19
+
20
+ ## Usage
21
+
22
+ The gem provides a simple interface to parse text content:
23
+
24
+ ```ruby
25
+ content = "Check out this blog post [My Blog](https://example.com) and follow @handle.bsky.social! #ruby"
26
+ parsed_content, facets = BskyParser.parse(content)
27
+
28
+ # Example usage:
29
+ # request_body = {
30
+ # repo: user_did,
31
+ # collection: "app.bsky.feed.post",
32
+ # record: {
33
+ # text: parsed_content,
34
+ # facets: facets,
35
+ # createdAt: current_time,
36
+ # "$type": "app.bsky.feed.post"
37
+ # }
38
+ # }
39
+ ```
40
+ ## Development
41
+
42
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
43
+
44
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
45
+
46
+ ## Contributing
47
+
48
+ Bug reports and pull requests are welcome. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/jonathanyeong/bsky_parser/blob/main/CODE_OF_CONDUCT.md).
49
+
50
+ 1. Fork it
51
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
52
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
53
+ 4. Push to the branch (`git push origin my-new-feature`)
54
+ 5. Create new Pull Request
55
+
56
+ ## License
57
+
58
+ The gem is available as open source under the terms of the [MIT License](https://github.com/jonathanyeong/bsky-parser/blob/main/LICENSE).
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "minitest/test_task"
5
+
6
+ Minitest::TestTask.create
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[test rubocop]
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BskyParser
4
+ module Facets
5
+ class BaseFacet
6
+ def self.process(content)
7
+ new(content).process
8
+ end
9
+
10
+ attr_reader :content
11
+
12
+ def initialize(content)
13
+ @content = content
14
+ end
15
+
16
+ def process
17
+ raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_facet"
4
+
5
+ module BskyParser
6
+ module Facets
7
+ class MarkdownLinkFacet < BaseFacet
8
+ # Override class method to return both modified content and facets
9
+ def self.process(content)
10
+ new(content).process
11
+ end
12
+
13
+ def process
14
+ facets = []
15
+ result_text = content.dup
16
+ links = find_markdown_links
17
+
18
+ links.each do |link|
19
+ start_pos = result_text.index(link[:match])
20
+ next unless start_pos
21
+
22
+ end_pos = start_pos + link[:match].length
23
+
24
+ # Replace markdown syntax with just the text
25
+ result_text[start_pos...end_pos] = link[:text]
26
+
27
+ facets << {
28
+ index: {
29
+ byteStart: start_pos,
30
+ byteEnd: start_pos + link[:text].length
31
+ },
32
+ features: [{
33
+ "$type": "app.bsky.richtext.facet#link",
34
+ uri: link[:link]
35
+ }]
36
+ }
37
+ end
38
+
39
+ [result_text, facets]
40
+ end
41
+
42
+ private
43
+
44
+ def url_pattern
45
+ # This url pattern is different to URL facet url patten because
46
+ # we don't want to mix named and numbered capture groups.
47
+ # Instead we convert the numbered to non-capturing groups `?:`
48
+ %r{
49
+ \[
50
+ (?<text>[^\]]+) # The link text inside square brackets
51
+ \]
52
+ \(
53
+ (?<url>
54
+ https?:// # http:// or https://
55
+ (?:www\.)? # Optional www.
56
+ [-a-zA-Z0-9@:%._\+~#=]{1,256} # Domain name
57
+ \.
58
+ [a-zA-Z0-9()]{1,6} # TLD
59
+ \b
60
+ (?:[-a-zA-Z0-9()@:%_\+.~#?&/=]* # URL path, params, etc.
61
+ [-a-zA-Z0-9@%_\+~#/=])?
62
+ )
63
+ \)
64
+ }x
65
+ end
66
+
67
+ def find_markdown_links
68
+ content.to_enum(:scan, url_pattern).map do
69
+ match = Regexp.last_match
70
+ {
71
+ text: match[:text],
72
+ link: match[:url],
73
+ match: match.to_s
74
+ }
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_facet"
4
+
5
+ module BskyParser
6
+ module Facets
7
+ class MentionFacet < BaseFacet
8
+ BASE_URL = "https://bsky.social"
9
+
10
+ def process
11
+ facets = []
12
+ # regex based on: https://atproto.com/specs/handle#handle-identifier-syntax
13
+ matches = content.to_enum(:scan, mention_pattern).map do
14
+ match = Regexp.last_match
15
+ start_offset = match[1]&.length || 0
16
+
17
+ {
18
+ handle: match[0],
19
+ indices: [match.begin(0) + start_offset, match.end(0)]
20
+ }
21
+ end
22
+
23
+ matches.each do |match|
24
+ handle = match[:handle].to_s.strip[1..] # Trim leading @
25
+ indices = match[:indices]
26
+ did = fetch_did(handle)
27
+ next if did.nil?
28
+
29
+ facets << build_facet(indices, did)
30
+ end
31
+ facets
32
+ end
33
+
34
+ private
35
+
36
+ def conn
37
+ @conn ||= Faraday.new(url: BASE_URL) do |f|
38
+ f.request :json
39
+ end
40
+ end
41
+
42
+ def fetch_did(handle)
43
+ resp = conn.get("/xrpc/com.atproto.identity.resolveHandle", { handle: handle })
44
+ JSON.parse(resp.body)["did"] if resp.success?
45
+ rescue Faraday::Error
46
+ # TODO: Introduce logging
47
+ nil
48
+ end
49
+
50
+ def mention_pattern
51
+ /(^|\s)(@([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)/
52
+ end
53
+
54
+ def build_facet(indices, handle_did)
55
+ {
56
+ index: {
57
+ byteStart: indices[0],
58
+ byteEnd: indices[1]
59
+ },
60
+ features: [{
61
+ "$type": "app.bsky.richtext.facet#mention",
62
+ did: handle_did
63
+ }]
64
+ }
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_facet"
4
+
5
+ module BskyParser
6
+ module Facets
7
+ class TagFacet < BaseFacet
8
+ def process
9
+ facets = []
10
+ tag_pattern = /(^|\s)#[\w-]+/
11
+ matches = content.to_enum(:scan, tag_pattern).map do
12
+ match = Regexp.last_match
13
+ # If there's a space before the hashtag (match[1] contains a space),
14
+ # adjust the start position by adding 1
15
+ start_offset = match[1]&.length || 0
16
+
17
+ {
18
+ tag: match[0],
19
+ indices: [match.begin(0) + start_offset, match.end(0)]
20
+ }
21
+ end
22
+
23
+ matches.each do |match|
24
+ tag = match[:tag].to_s.lstrip[1..] # Trim leading space and hashtag
25
+ indices = match[:indices]
26
+ facets << build_facet(indices, tag)
27
+ end
28
+
29
+ facets
30
+ end
31
+
32
+ private
33
+
34
+ def build_facet(indices, tag)
35
+ {
36
+ index: {
37
+ byteStart: indices[0],
38
+ byteEnd: indices[1]
39
+ },
40
+ features: [{
41
+ "$type": "app.bsky.richtext.facet#tag",
42
+ tag: tag
43
+ }]
44
+ }
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_facet"
4
+
5
+ module BskyParser
6
+ module Facets
7
+ class URLFacet < BaseFacet
8
+ def process
9
+ facets = []
10
+
11
+ matches = content.to_enum(:scan, url_pattern).map do
12
+ match = Regexp.last_match
13
+ # Handles multiple urls
14
+ start_offset = match[1]&.length || 0
15
+
16
+ {
17
+ url: match[0],
18
+ indices: [match.begin(0) + start_offset, match.end(0)]
19
+ }
20
+ end
21
+
22
+ matches.each do |match|
23
+ url = match[:url].to_s.lstrip
24
+ indices = match[:indices]
25
+ facets << build_facet(indices, url)
26
+ end
27
+ facets
28
+ end
29
+
30
+ private
31
+
32
+ def url_pattern
33
+ # URI::RFC2396_PARSER.make_regexp has a complex regex with multiple capture groups
34
+ # Instead, use the URL pattern from https://docs.bsky.app/docs/advanced-guides/post-richtext
35
+ %r{
36
+ (^|\s)
37
+ (https?://
38
+ (www\.)?
39
+ [-a-zA-Z0-9@:%._\+~#=]{1,256}
40
+ \.
41
+ [a-zA-Z0-9()]{1,6}\b
42
+ ([-a-zA-Z0-9()@:%_\+.~#?&/=]*
43
+ [-a-zA-Z0-9@%_\+~#/=])?)
44
+ }x
45
+ end
46
+
47
+ def build_facet(indices, url)
48
+ {
49
+ index: {
50
+ byteStart: indices[0],
51
+ byteEnd: indices[1]
52
+ },
53
+ features: [{
54
+ "$type": "app.bsky.richtext.facet#link",
55
+ uri: url
56
+ }]
57
+ }
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BskyParser
4
+ VERSION = "1.0.2"
5
+ end
data/lib/bsky_parser.rb CHANGED
@@ -1,137 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "bsky_parser/version"
4
+ require_relative "bsky_parser/facets/markdown_link_facet"
5
+ require_relative "bsky_parser/facets/tag_facet"
6
+ require_relative "bsky_parser/facets/url_facet"
7
+ require_relative "bsky_parser/facets/mention_facet"
8
+
1
9
  require "faraday"
2
10
 
3
- class BskyParser
11
+ module BskyParser
4
12
  class << self
5
- BASE_URL = "https://bsky.social"
6
-
7
13
  def parse(content)
8
- parsed_content, mkdown_facets = process_markdown_links(content)
9
-
10
- facets = mkdown_facets + tag_facets(parsed_content) + mention_facets(parsed_content) + url_facets(parsed_content)
11
-
12
- [ parsed_content, facets]
13
- end
14
-
15
- private
16
-
17
- def conn
18
- @conn ||= Faraday.new(url: BASE_URL) do |f|
19
- f.request :json
20
- end
21
- end
22
-
23
- def tag_facets(content)
24
- facets = []
25
- tag_pattern = /#\S+/
26
- matches = content.to_enum(:scan, tag_pattern).map { { tag: Regexp.last_match, indices: Regexp.last_match.offset(0) } }
27
- matches.each do |match|
28
- tag = match[:tag].to_s[1..-1] # Trim leading hashtag
29
- indices = match[:indices]
30
- facets << {
31
- index: {
32
- byteStart: indices[0],
33
- byteEnd: indices[1]
34
- },
35
- features: [ {
36
- "$type": "app.bsky.richtext.facet#tag",
37
- tag: tag
38
- } ]
39
- }
40
- end
41
- facets
42
- end
43
-
44
- def mention_facets(content)
45
- facets = []
46
- # regex based on: https://atproto.com/specs/handle#handle-identifier-syntax
47
- mention_pattern = /[$|\W](@([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)/
48
- matches = content.to_enum(:scan, mention_pattern).map { { handle: Regexp.last_match, indices: Regexp.last_match.offset(0) } }
49
- matches.each do |match|
50
- handle = match[:handle].to_s.strip[1..-1] # Trim leading @
51
- indices = match[:indices]
52
- resp = conn.get("/xrpc/com.atproto.identity.resolveHandle", { handle: handle })
53
- handle_did = JSON.parse(resp.body)["did"]
54
- facets << {
55
- index: {
56
- byteStart: indices[0],
57
- byteEnd: indices[1]
58
- },
59
- features: [ {
60
- "$type": "app.bsky.richtext.facet#mention",
61
- did: handle_did
62
- } ]
63
- }
64
- end
65
- facets
66
- end
67
-
68
- def mkdown_links(content)
69
- mkdown_links = []
70
- url_pattern = /\[(?<text>[^\]]+)\]\((?<url>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*[-a-zA-Z0-9@%_\+~#\/\/=])?)\)/
71
- matches = content.to_enum(:scan, url_pattern).map { Regexp.last_match }
72
-
73
- mkdown_links = []
74
- matches.each do |match|
75
- mkdown_links << {
76
- text: match[:text],
77
- link: match[:url],
78
- match: match.to_s
79
- }
80
- end
81
- mkdown_links
82
- end
83
-
84
- def process_markdown_links(content)
85
- facets = []
86
- result_text = content.dup
87
-
88
- links = mkdown_links(content)
14
+ parsed_content, mkdown_facets = Facets::MarkdownLinkFacet.process(content)
89
15
 
90
- links.reverse_each do |link|
91
- start_pos = result_text.index(link[:match])
16
+ facets =
17
+ mkdown_facets +
18
+ Facets::TagFacet.process(parsed_content) +
19
+ Facets::MentionFacet.process(parsed_content) +
20
+ Facets::URLFacet.process(parsed_content)
92
21
 
93
- if start_pos
94
- end_pos = start_pos + link[:match].length
95
-
96
- facets << {
97
- index: {
98
- byteStart: start_pos,
99
- byteEnd: start_pos + link[:text].length
100
- },
101
- features: [ {
102
- "$type": "app.bsky.richtext.facet#link",
103
- uri: link[:link]
104
- } ]
105
- }
106
-
107
- result_text[start_pos...end_pos] = link[:text]
108
- end
109
- end
110
-
111
- [ result_text, facets ]
112
- end
113
-
114
- def url_facets(content)
115
- facets = []
116
- # URL pattern from https://docs.bsky.app/docs/advanced-guides/post-richtext
117
- url_pattern = /([$|\W])(https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*[-a-zA-Z0-9@%_\+~#\/\/=])?)/
118
- matches = content.to_enum(:scan, url_pattern).map { { url: Regexp.last_match, indices: Regexp.last_match.offset(0) } }
119
- matches.each do |match|
120
- url = match[:url].to_s[1..-1]
121
- indices = match[:indices]
122
- facets << {
123
- index: {
124
- byteStart: indices[0],
125
- byteEnd: indices[1]
126
- },
127
- features: [ {
128
- "$type": "app.bsky.richtext.facet#link",
129
- uri: url
130
- } ]
131
- }
132
- end
133
- facets
22
+ [parsed_content, facets]
134
23
  end
135
24
  end
136
25
  end
137
-
@@ -0,0 +1,4 @@
1
+ module BskyParser
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bsky-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Yeong
8
8
  autorequire:
9
- bindir: bin
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2025-02-19 00:00:00.000000000 Z
11
+ date: 2025-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -30,12 +30,29 @@ executables: []
30
30
  extensions: []
31
31
  extra_rdoc_files: []
32
32
  files:
33
+ - ".devcontainer/devcontainer.json"
34
+ - ".rubocop.yml"
35
+ - CHANGELOG.md
36
+ - CODE_OF_CONDUCT.md
37
+ - LICENSE
38
+ - README.md
39
+ - Rakefile
33
40
  - lib/bsky_parser.rb
34
- homepage: https://rubygems.org/gems/bsky_parser
41
+ - lib/bsky_parser/facets/base_facet.rb
42
+ - lib/bsky_parser/facets/markdown_link_facet.rb
43
+ - lib/bsky_parser/facets/mention_facet.rb
44
+ - lib/bsky_parser/facets/tag_facet.rb
45
+ - lib/bsky_parser/facets/url_facet.rb
46
+ - lib/bsky_parser/version.rb
47
+ - sig/bsky_parser.rbs
48
+ homepage: https://github.com/jonathanyeong/bsky-parser
35
49
  licenses:
36
50
  - MIT
37
51
  metadata:
52
+ homepage_uri: https://github.com/jonathanyeong/bsky-parser
38
53
  source_code_uri: https://github.com/jonathanyeong/bsky-parser
54
+ bug_tracker_uri: https://github.com/jonathanyeong/bsky-parser/issues
55
+ rubygems_mfa_required: 'true'
39
56
  post_install_message:
40
57
  rdoc_options: []
41
58
  require_paths: