docsmith 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rspec_status +212 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE.txt +21 -0
- data/README.md +66 -0
- data/Rakefile +8 -0
- data/USAGE.md +510 -0
- data/docs/superpowers/plans/2026-04-01-docsmith-full-plan.md +6459 -0
- data/docs/superpowers/plans/2026-04-08-parsers-remove-branches-docs.md +2112 -0
- data/docs/superpowers/specs/2026-04-01-docsmith-phase1-design.md +340 -0
- data/docsmith_spec.md +630 -0
- data/lib/docsmith/auto_save.rb +29 -0
- data/lib/docsmith/comments/anchor.rb +68 -0
- data/lib/docsmith/comments/comment.rb +44 -0
- data/lib/docsmith/comments/manager.rb +73 -0
- data/lib/docsmith/comments/migrator.rb +64 -0
- data/lib/docsmith/configuration.rb +95 -0
- data/lib/docsmith/diff/engine.rb +39 -0
- data/lib/docsmith/diff/parsers/html.rb +64 -0
- data/lib/docsmith/diff/parsers/markdown.rb +60 -0
- data/lib/docsmith/diff/renderers/base.rb +62 -0
- data/lib/docsmith/diff/renderers/registry.rb +41 -0
- data/lib/docsmith/diff/renderers.rb +10 -0
- data/lib/docsmith/diff/result.rb +77 -0
- data/lib/docsmith/diff.rb +6 -0
- data/lib/docsmith/document.rb +44 -0
- data/lib/docsmith/document_version.rb +50 -0
- data/lib/docsmith/errors.rb +18 -0
- data/lib/docsmith/events/event.rb +19 -0
- data/lib/docsmith/events/hook_registry.rb +14 -0
- data/lib/docsmith/events/notifier.rb +22 -0
- data/lib/docsmith/rendering/html_renderer.rb +36 -0
- data/lib/docsmith/rendering/json_renderer.rb +29 -0
- data/lib/docsmith/version.rb +5 -0
- data/lib/docsmith/version_manager.rb +143 -0
- data/lib/docsmith/version_tag.rb +25 -0
- data/lib/docsmith/versionable.rb +252 -0
- data/lib/docsmith.rb +52 -0
- data/lib/generators/docsmith/install/install_generator.rb +27 -0
- data/lib/generators/docsmith/install/templates/create_docsmith_tables.rb.erb +64 -0
- data/lib/generators/docsmith/install/templates/docsmith_initializer.rb.erb +19 -0
- data/sig/docsmith.rbs +4 -0
- metadata +196 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
module Docsmith
|
|
6
|
+
module Comments
|
|
7
|
+
# Builds and migrates range anchors for inline comments.
|
|
8
|
+
# An anchor captures character offsets and a content hash of the selected text
|
|
9
|
+
# so the comment can be relocated when content changes between versions.
|
|
10
|
+
module Anchor
|
|
11
|
+
ACTIVE = "active"
|
|
12
|
+
DRIFTED = "drifted"
|
|
13
|
+
ORPHANED = "orphaned"
|
|
14
|
+
|
|
15
|
+
# Builds anchor_data for a new range comment.
|
|
16
|
+
#
|
|
17
|
+
# @param content [String] the version content at comment time
|
|
18
|
+
# @param start_offset [Integer] character offset of selection start (inclusive)
|
|
19
|
+
# @param end_offset [Integer] character offset of selection end (exclusive)
|
|
20
|
+
# @return [Hash] anchor_data hash ready to store on the Comment
|
|
21
|
+
def self.build(content, start_offset:, end_offset:)
|
|
22
|
+
anchored_text = content[start_offset...end_offset].to_s
|
|
23
|
+
{
|
|
24
|
+
start_offset: start_offset,
|
|
25
|
+
end_offset: end_offset,
|
|
26
|
+
content_hash: Digest::SHA256.hexdigest(anchored_text),
|
|
27
|
+
anchored_text: anchored_text,
|
|
28
|
+
status: ACTIVE
|
|
29
|
+
}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Attempts to migrate an existing anchor to new version content.
|
|
33
|
+
#
|
|
34
|
+
# Strategy:
|
|
35
|
+
# 1. Try exact offset — if SHA256 of text at same offsets matches, return ACTIVE.
|
|
36
|
+
# 2. Search the full content for the original anchored text — return DRIFTED with new offsets.
|
|
37
|
+
# 3. If not found anywhere, return ORPHANED.
|
|
38
|
+
#
|
|
39
|
+
# @param content [String] new version content
|
|
40
|
+
# @param anchor_data [Hash] existing anchor_data (string keys from JSON storage)
|
|
41
|
+
# @return [Hash] updated anchor_data with new :status
|
|
42
|
+
def self.migrate(content, anchor_data)
|
|
43
|
+
start_off = anchor_data["start_offset"]
|
|
44
|
+
end_off = anchor_data["end_offset"]
|
|
45
|
+
original_hash = anchor_data["content_hash"]
|
|
46
|
+
original_text = anchor_data["anchored_text"]
|
|
47
|
+
|
|
48
|
+
# 1. Exact offset check
|
|
49
|
+
candidate = content[start_off...end_off].to_s
|
|
50
|
+
return anchor_data.merge("status" => ACTIVE) if Digest::SHA256.hexdigest(candidate) == original_hash
|
|
51
|
+
|
|
52
|
+
# 2. Full-text search for relocated text
|
|
53
|
+
idx = content.index(original_text)
|
|
54
|
+
if idx
|
|
55
|
+
new_end = idx + original_text.length
|
|
56
|
+
return anchor_data.merge(
|
|
57
|
+
"start_offset" => idx,
|
|
58
|
+
"end_offset" => new_end,
|
|
59
|
+
"status" => DRIFTED
|
|
60
|
+
)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# 3. Orphaned — text no longer exists
|
|
64
|
+
anchor_data.merge("status" => ORPHANED)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Docsmith
|
|
6
|
+
module Comments
|
|
7
|
+
# Represents a comment on a specific DocumentVersion.
|
|
8
|
+
# Supports document-level and range-anchored inline annotations,
|
|
9
|
+
# threaded replies via parent/replies, and resolution tracking.
|
|
10
|
+
class Comment < ActiveRecord::Base
|
|
11
|
+
self.table_name = "docsmith_comments"
|
|
12
|
+
|
|
13
|
+
belongs_to :version, class_name: "Docsmith::DocumentVersion", foreign_key: :version_id
|
|
14
|
+
belongs_to :author, polymorphic: true, optional: true
|
|
15
|
+
belongs_to :parent, class_name: "Docsmith::Comments::Comment", optional: true
|
|
16
|
+
belongs_to :resolved_by, polymorphic: true, optional: true
|
|
17
|
+
has_many :replies, class_name: "Docsmith::Comments::Comment",
|
|
18
|
+
foreign_key: :parent_id, dependent: :destroy
|
|
19
|
+
|
|
20
|
+
validates :body, presence: true
|
|
21
|
+
validates :anchor_type, inclusion: { in: %w[document range] }
|
|
22
|
+
|
|
23
|
+
scope :top_level, -> { where(parent_id: nil) }
|
|
24
|
+
scope :unresolved, -> { where(resolved: false) }
|
|
25
|
+
scope :document_level, -> { where(anchor_type: "document") }
|
|
26
|
+
scope :range_anchored, -> { where(anchor_type: "range") }
|
|
27
|
+
|
|
28
|
+
# Deserializes anchor_data from JSON text (SQLite) or returns hash directly (PostgreSQL jsonb).
|
|
29
|
+
#
|
|
30
|
+
# @return [Hash]
|
|
31
|
+
def anchor_data
|
|
32
|
+
raw = read_attribute(:anchor_data)
|
|
33
|
+
raw.is_a?(String) ? JSON.parse(raw) : raw.to_h
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Serializes anchor_data as JSON for storage.
|
|
37
|
+
#
|
|
38
|
+
# @param data [Hash, String]
|
|
39
|
+
def anchor_data=(data)
|
|
40
|
+
write_attribute(:anchor_data, data.is_a?(String) ? data : data.to_json)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Docsmith
|
|
4
|
+
module Comments
|
|
5
|
+
# Service object for creating and resolving comments on document versions.
|
|
6
|
+
class Manager
|
|
7
|
+
class << self
|
|
8
|
+
# Adds a comment to a specific version of a document.
|
|
9
|
+
#
|
|
10
|
+
# @param document [Docsmith::Document]
|
|
11
|
+
# @param version_number [Integer]
|
|
12
|
+
# @param body [String]
|
|
13
|
+
# @param author [Object] polymorphic author record
|
|
14
|
+
# @param anchor [Hash, nil] { start_offset:, end_offset: } for inline range comments
|
|
15
|
+
# @param parent [Comments::Comment, nil] parent comment for threading
|
|
16
|
+
# @return [Comments::Comment]
|
|
17
|
+
# @raise [ActiveRecord::RecordNotFound] if version_number does not exist
|
|
18
|
+
def add!(document, version_number:, body:, author:, anchor: nil, parent: nil)
|
|
19
|
+
version = Docsmith::DocumentVersion.find_by!(document: document, version_number: version_number)
|
|
20
|
+
|
|
21
|
+
anchor_type = anchor ? "range" : "document"
|
|
22
|
+
anchor_data = if anchor
|
|
23
|
+
Anchor.build(version.content.to_s,
|
|
24
|
+
start_offset: anchor[:start_offset],
|
|
25
|
+
end_offset: anchor[:end_offset])
|
|
26
|
+
else
|
|
27
|
+
{}
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
comment = Comment.create!(
|
|
31
|
+
version: version,
|
|
32
|
+
author: author,
|
|
33
|
+
body: body,
|
|
34
|
+
anchor_type: anchor_type,
|
|
35
|
+
anchor_data: anchor_data,
|
|
36
|
+
parent: parent,
|
|
37
|
+
resolved: false
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
Events::Notifier.instrument(:comment_added,
|
|
41
|
+
record: document.subject || document,
|
|
42
|
+
document: document,
|
|
43
|
+
version: version,
|
|
44
|
+
author: author,
|
|
45
|
+
comment: comment
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
comment
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Marks a comment as resolved.
|
|
52
|
+
#
|
|
53
|
+
# @param comment [Comments::Comment]
|
|
54
|
+
# @param by [Object] polymorphic resolver record
|
|
55
|
+
# @return [Comments::Comment]
|
|
56
|
+
def resolve!(comment, by:)
|
|
57
|
+
comment.update!(resolved: true, resolved_by: by, resolved_at: Time.current)
|
|
58
|
+
|
|
59
|
+
document = comment.version.document
|
|
60
|
+
Events::Notifier.instrument(:comment_resolved,
|
|
61
|
+
record: document.subject || document,
|
|
62
|
+
document: document,
|
|
63
|
+
version: comment.version,
|
|
64
|
+
author: by,
|
|
65
|
+
comment: comment
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
comment
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Docsmith
|
|
4
|
+
module Comments
|
|
5
|
+
# Migrates top-level comments from one version to another.
|
|
6
|
+
# Document-level comments are copied as-is.
|
|
7
|
+
# Range-anchored comments are re-anchored using Anchor.migrate;
|
|
8
|
+
# orphaned comments fire the :comment_orphaned event.
|
|
9
|
+
class Migrator
|
|
10
|
+
class << self
|
|
11
|
+
# @param document [Docsmith::Document]
|
|
12
|
+
# @param from [Integer] source version_number
|
|
13
|
+
# @param to [Integer] target version_number
|
|
14
|
+
# @return [void]
|
|
15
|
+
def migrate!(document, from:, to:)
|
|
16
|
+
from_version = Docsmith::DocumentVersion.find_by!(document: document, version_number: from)
|
|
17
|
+
to_version = Docsmith::DocumentVersion.find_by!(document: document, version_number: to)
|
|
18
|
+
new_content = to_version.content.to_s
|
|
19
|
+
|
|
20
|
+
from_version.comments.top_level.each do |comment|
|
|
21
|
+
new_anchor_data = migrate_anchor(comment, new_content)
|
|
22
|
+
|
|
23
|
+
new_comment = Comment.create!(
|
|
24
|
+
version: to_version,
|
|
25
|
+
author_type: comment.author_type,
|
|
26
|
+
author_id: comment.author_id,
|
|
27
|
+
body: comment.body,
|
|
28
|
+
anchor_type: comment.anchor_type,
|
|
29
|
+
anchor_data: new_anchor_data,
|
|
30
|
+
resolved: comment.resolved,
|
|
31
|
+
resolved_by_type: comment.resolved_by_type,
|
|
32
|
+
resolved_by_id: comment.resolved_by_id,
|
|
33
|
+
resolved_at: comment.resolved_at
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
if orphaned?(comment, new_anchor_data)
|
|
37
|
+
Events::Notifier.instrument(:comment_orphaned,
|
|
38
|
+
record: document.subject || document,
|
|
39
|
+
document: document,
|
|
40
|
+
version: to_version,
|
|
41
|
+
author: nil,
|
|
42
|
+
comment: new_comment
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
# @return [Hash] migrated anchor_data
|
|
51
|
+
def migrate_anchor(comment, new_content)
|
|
52
|
+
return comment.anchor_data if comment.anchor_type == "document"
|
|
53
|
+
|
|
54
|
+
Anchor.migrate(new_content, comment.anchor_data)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# @return [Boolean]
|
|
58
|
+
def orphaned?(comment, new_anchor_data)
|
|
59
|
+
comment.anchor_type == "range" && new_anchor_data["status"] == Anchor::ORPHANED
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Docsmith
|
|
4
|
+
# DSL object for per-class docsmith_config blocks.
|
|
5
|
+
# Each method call stores a key in @settings.
|
|
6
|
+
# Resolution against global config happens at read time via Configuration.resolve.
|
|
7
|
+
class ClassConfig
|
|
8
|
+
KEYS = %i[content_field content_type auto_save debounce max_versions content_extractor].freeze
|
|
9
|
+
|
|
10
|
+
# @return [Hash] raw settings set in this block
|
|
11
|
+
attr_reader :settings
|
|
12
|
+
|
|
13
|
+
def initialize
|
|
14
|
+
@settings = {}
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
KEYS.each do |key|
|
|
18
|
+
define_method(key) { |val| @settings[key] = val }
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Global configuration object. Set via Docsmith.configure { |c| ... }.
|
|
23
|
+
class Configuration
|
|
24
|
+
# Gem-level defaults — final fallback in resolution order.
|
|
25
|
+
# debounce stored as Integer (seconds); Duration values normalized via .to_i at resolve time.
|
|
26
|
+
DEFAULTS = {
|
|
27
|
+
content_field: :body,
|
|
28
|
+
content_type: :markdown,
|
|
29
|
+
auto_save: true,
|
|
30
|
+
debounce: 30,
|
|
31
|
+
max_versions: nil,
|
|
32
|
+
content_extractor: nil
|
|
33
|
+
}.freeze
|
|
34
|
+
|
|
35
|
+
# Maps ClassConfig keys to their global Configuration attribute names.
|
|
36
|
+
GLOBAL_KEY_MAP = {
|
|
37
|
+
content_field: :default_content_field,
|
|
38
|
+
content_type: :default_content_type,
|
|
39
|
+
auto_save: :auto_save,
|
|
40
|
+
debounce: :default_debounce,
|
|
41
|
+
max_versions: :max_versions,
|
|
42
|
+
content_extractor: :content_extractor
|
|
43
|
+
}.freeze
|
|
44
|
+
|
|
45
|
+
attr_accessor :default_content_field, :default_content_type, :auto_save,
|
|
46
|
+
:default_debounce, :max_versions, :content_extractor,
|
|
47
|
+
:table_prefix, :diff_context_lines
|
|
48
|
+
|
|
49
|
+
def initialize
|
|
50
|
+
@default_content_field = DEFAULTS[:content_field]
|
|
51
|
+
@default_content_type = DEFAULTS[:content_type]
|
|
52
|
+
@auto_save = DEFAULTS[:auto_save]
|
|
53
|
+
@default_debounce = DEFAULTS[:debounce]
|
|
54
|
+
@max_versions = DEFAULTS[:max_versions]
|
|
55
|
+
@content_extractor = DEFAULTS[:content_extractor]
|
|
56
|
+
@table_prefix = "docsmith"
|
|
57
|
+
@diff_context_lines = 3
|
|
58
|
+
@hooks = Hash.new { |h, k| h[k] = [] }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Register a synchronous callback for a named event.
|
|
62
|
+
# @param event_name [Symbol] e.g. :version_created
|
|
63
|
+
# @yield [Docsmith::Events::Event]
|
|
64
|
+
def on(event_name, &block)
|
|
65
|
+
@hooks[event_name] << block
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# @param event_name [Symbol]
|
|
69
|
+
# @return [Array<Proc>]
|
|
70
|
+
def hooks_for(event_name)
|
|
71
|
+
@hooks[event_name]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Merge per-class settings over global config over gem defaults.
|
|
75
|
+
# Resolution is at read time — global changes after class definition still apply
|
|
76
|
+
# for keys the class does not override.
|
|
77
|
+
# @param class_settings [Hash]
|
|
78
|
+
# @param global_config [Docsmith::Configuration, nil]
|
|
79
|
+
# @return [Hash] fully resolved config
|
|
80
|
+
def self.resolve(class_settings, global_config)
|
|
81
|
+
DEFAULTS.each_with_object({}) do |(key, default_val), result|
|
|
82
|
+
global_key = GLOBAL_KEY_MAP[key]
|
|
83
|
+
global_val = global_config&.public_send(global_key)
|
|
84
|
+
|
|
85
|
+
result[key] = if class_settings.key?(key)
|
|
86
|
+
class_settings[key]
|
|
87
|
+
elsif !global_val.nil?
|
|
88
|
+
global_val
|
|
89
|
+
else
|
|
90
|
+
default_val
|
|
91
|
+
end
|
|
92
|
+
end.tap { |r| r[:debounce] = r[:debounce].to_i }
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Docsmith
|
|
4
|
+
module Diff
|
|
5
|
+
# Computes diffs between two DocumentVersion records.
|
|
6
|
+
# For markdown and html content types, a format-aware parser is used
|
|
7
|
+
# (word-level for markdown, tag-atomic for html).
|
|
8
|
+
# Falls back to Renderers::Base (line-level) for json and unknown types.
|
|
9
|
+
class Engine
|
|
10
|
+
PARSERS = {
|
|
11
|
+
"markdown" => Parsers::Markdown,
|
|
12
|
+
"html" => Parsers::Html
|
|
13
|
+
}.freeze
|
|
14
|
+
|
|
15
|
+
class << self
|
|
16
|
+
# @param version_a [Docsmith::DocumentVersion] the older version
|
|
17
|
+
# @param version_b [Docsmith::DocumentVersion] the newer version
|
|
18
|
+
# @return [Docsmith::Diff::Result]
|
|
19
|
+
def between(version_a, version_b)
|
|
20
|
+
content_type = version_a.content_type.to_s
|
|
21
|
+
parser = PARSERS.fetch(content_type, Renderers::Base).new
|
|
22
|
+
changes = parser.compute(version_a.content.to_s, version_b.content.to_s)
|
|
23
|
+
|
|
24
|
+
Result.new(
|
|
25
|
+
content_type: content_type,
|
|
26
|
+
from_version: version_a.version_number,
|
|
27
|
+
to_version: version_b.version_number,
|
|
28
|
+
changes: changes
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Convenience module method: Docsmith::Diff.between(v1, v2)
|
|
35
|
+
def self.between(version_a, version_b)
|
|
36
|
+
Engine.between(version_a, version_b)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "diff/lcs"
|
|
4
|
+
|
|
5
|
+
module Docsmith
|
|
6
|
+
module Diff
|
|
7
|
+
module Parsers
|
|
8
|
+
# HTML-aware diff parser for HTML documents.
|
|
9
|
+
#
|
|
10
|
+
# Tokenizes HTML so that each tag (including its attributes) is one atomic
|
|
11
|
+
# unit and text words are separate units. This prevents the diff engine from
|
|
12
|
+
# splitting `<p class="foo">` into angle brackets, attribute names, and values.
|
|
13
|
+
#
|
|
14
|
+
# Tokenization regex: /<[^>]+>|[^\s<>]+/
|
|
15
|
+
# - /<[^>]+>/ matches any HTML tag: <p>, </p>, <div class="x">, <br/>
|
|
16
|
+
# - /[^\s<>]+/ matches words in text content between tags
|
|
17
|
+
#
|
|
18
|
+
# Example: "<p>Hello world</p>" → ["<p>", "Hello", "world", "</p>"]
|
|
19
|
+
#
|
|
20
|
+
# The :line key in change hashes stores the 1-indexed token position
|
|
21
|
+
# (not a line number) for compatibility with Diff::Result serialization.
|
|
22
|
+
class Html < Renderers::Base
|
|
23
|
+
TAG_OR_WORD = /<[^>]+>|[^\s<>]+/.freeze
|
|
24
|
+
|
|
25
|
+
# @param old_content [String]
|
|
26
|
+
# @param new_content [String]
|
|
27
|
+
# @return [Array<Hash>] change hashes with :type, :line (token index), and content keys
|
|
28
|
+
def compute(old_content, new_content)
|
|
29
|
+
old_tokens = tokenize(old_content)
|
|
30
|
+
new_tokens = tokenize(new_content)
|
|
31
|
+
changes = []
|
|
32
|
+
|
|
33
|
+
::Diff::LCS.sdiff(old_tokens, new_tokens).each do |hunk|
|
|
34
|
+
case hunk.action
|
|
35
|
+
when "+"
|
|
36
|
+
changes << { type: :addition, line: hunk.new_position + 1, content: hunk.new_element.to_s }
|
|
37
|
+
when "-"
|
|
38
|
+
changes << { type: :deletion, line: hunk.old_position + 1, content: hunk.old_element.to_s }
|
|
39
|
+
when "!"
|
|
40
|
+
changes << {
|
|
41
|
+
type: :modification,
|
|
42
|
+
line: hunk.old_position + 1,
|
|
43
|
+
old_content: hunk.old_element.to_s,
|
|
44
|
+
new_content: hunk.new_element.to_s
|
|
45
|
+
}
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
changes
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
# Splits HTML into tokens:
|
|
55
|
+
# - Each HTML tag (including attributes) is one token
|
|
56
|
+
# - Each word in text content is one token
|
|
57
|
+
# Whitespace between tokens is discarded.
|
|
58
|
+
def tokenize(content)
|
|
59
|
+
content.scan(TAG_OR_WORD)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "diff/lcs"
|
|
4
|
+
|
|
5
|
+
module Docsmith
|
|
6
|
+
module Diff
|
|
7
|
+
module Parsers
|
|
8
|
+
# Word-level diff parser for Markdown documents.
|
|
9
|
+
#
|
|
10
|
+
# Instead of comparing line-by-line (as Renderers::Base does), this parser
|
|
11
|
+
# tokenizes content into individual words and newline groups, then diffs
|
|
12
|
+
# those tokens. This gives precise word-level change detection for prose,
|
|
13
|
+
# which is far more useful than "the whole line changed."
|
|
14
|
+
#
|
|
15
|
+
# Tokenization: content.scan(/\S+|\n+/)
|
|
16
|
+
# "Hello world\n\nFoo" → ["Hello", "world", "\n\n", "Foo"]
|
|
17
|
+
#
|
|
18
|
+
# The :line key in change hashes stores the 1-indexed token position
|
|
19
|
+
# (not a line number) for compatibility with Diff::Result serialization.
|
|
20
|
+
class Markdown < Renderers::Base
|
|
21
|
+
# @param old_content [String]
|
|
22
|
+
# @param new_content [String]
|
|
23
|
+
# @return [Array<Hash>] change hashes with :type, :line (token index), and content keys
|
|
24
|
+
def compute(old_content, new_content)
|
|
25
|
+
old_tokens = tokenize(old_content)
|
|
26
|
+
new_tokens = tokenize(new_content)
|
|
27
|
+
changes = []
|
|
28
|
+
|
|
29
|
+
::Diff::LCS.sdiff(old_tokens, new_tokens).each do |hunk|
|
|
30
|
+
case hunk.action
|
|
31
|
+
when "+"
|
|
32
|
+
changes << { type: :addition, line: hunk.new_position + 1, content: hunk.new_element.to_s }
|
|
33
|
+
when "-"
|
|
34
|
+
changes << { type: :deletion, line: hunk.old_position + 1, content: hunk.old_element.to_s }
|
|
35
|
+
when "!"
|
|
36
|
+
changes << {
|
|
37
|
+
type: :modification,
|
|
38
|
+
line: hunk.old_position + 1,
|
|
39
|
+
old_content: hunk.old_element.to_s,
|
|
40
|
+
new_content: hunk.new_element.to_s
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
changes
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
# Splits markdown into word tokens.
|
|
51
|
+
# \S+ matches any non-whitespace run (words, punctuation, markdown markers).
|
|
52
|
+
# \n+ matches one or more consecutive newlines as a single token so that
|
|
53
|
+
# paragraph breaks (\n\n) and line breaks (\n) are each one diffable unit.
|
|
54
|
+
def tokenize(content)
|
|
55
|
+
content.scan(/\S+|\n+/)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "diff/lcs"
|
|
4
|
+
require "cgi"
|
|
5
|
+
|
|
6
|
+
module Docsmith
|
|
7
|
+
module Diff
|
|
8
|
+
module Renderers
|
|
9
|
+
# Line-level diff renderer using diff-lcs.
|
|
10
|
+
# Handles all content types (markdown, html, json) for Phase 2.
|
|
11
|
+
# Register content-type-specific renderers via Renderers::Registry when needed.
|
|
12
|
+
class Base
|
|
13
|
+
# Computes line-level changes between two content strings.
|
|
14
|
+
#
|
|
15
|
+
# @param old_content [String]
|
|
16
|
+
# @param new_content [String]
|
|
17
|
+
# @return [Array<Hash>] change hashes with :type, :line, and content fields
|
|
18
|
+
def compute(old_content, new_content)
|
|
19
|
+
old_lines = old_content.split("\n", -1)
|
|
20
|
+
new_lines = new_content.split("\n", -1)
|
|
21
|
+
changes = []
|
|
22
|
+
|
|
23
|
+
::Diff::LCS.sdiff(old_lines, new_lines).each do |hunk|
|
|
24
|
+
case hunk.action
|
|
25
|
+
when "+"
|
|
26
|
+
changes << { type: :addition, line: hunk.new_position + 1, content: hunk.new_element.to_s }
|
|
27
|
+
when "-"
|
|
28
|
+
changes << { type: :deletion, line: hunk.old_position + 1, content: hunk.old_element.to_s }
|
|
29
|
+
when "!"
|
|
30
|
+
changes << {
|
|
31
|
+
type: :modification,
|
|
32
|
+
line: hunk.old_position + 1,
|
|
33
|
+
old_content: hunk.old_element.to_s,
|
|
34
|
+
new_content: hunk.new_element.to_s
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
changes
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Renders a change array as an HTML diff representation.
|
|
43
|
+
#
|
|
44
|
+
# @param changes [Array<Hash>]
|
|
45
|
+
# @return [String] HTML string
|
|
46
|
+
def render_html(changes)
|
|
47
|
+
lines = changes.map do |change|
|
|
48
|
+
case change[:type]
|
|
49
|
+
when :addition
|
|
50
|
+
%(<ins class="docsmith-addition">#{CGI.escapeHTML(change[:content])}</ins>)
|
|
51
|
+
when :deletion
|
|
52
|
+
%(<del class="docsmith-deletion">#{CGI.escapeHTML(change[:content])}</del>)
|
|
53
|
+
when :modification
|
|
54
|
+
%(<del class="docsmith-deletion">#{CGI.escapeHTML(change[:old_content])}</del><ins class="docsmith-addition">#{CGI.escapeHTML(change[:new_content])}</ins>)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
%(<div class="docsmith-diff">#{lines.join("\n")}</div>)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Docsmith
|
|
4
|
+
module Diff
|
|
5
|
+
module Renderers
|
|
6
|
+
# Registry for diff renderers keyed by content type string.
|
|
7
|
+
# Falls back to Base for unregistered types.
|
|
8
|
+
# Use Docsmith.configure { |c| c.register_diff_renderer(:html, MyRenderer) }
|
|
9
|
+
# to add custom renderers at runtime.
|
|
10
|
+
class Registry
|
|
11
|
+
@renderers = {}
|
|
12
|
+
|
|
13
|
+
class << self
|
|
14
|
+
# @param content_type [String, Symbol]
|
|
15
|
+
# @param renderer_class [Class]
|
|
16
|
+
# @return [void]
|
|
17
|
+
def register(content_type, renderer_class)
|
|
18
|
+
@renderers[content_type.to_s] = renderer_class
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# @param content_type [String, Symbol]
|
|
22
|
+
# @return [Class] renderer class; defaults to Base for unregistered types
|
|
23
|
+
def for(content_type)
|
|
24
|
+
@renderers.fetch(content_type.to_s, Base)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# @return [Hash] copy of registered renderers
|
|
28
|
+
def all
|
|
29
|
+
@renderers.dup
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Resets registry to empty — for test isolation only.
|
|
33
|
+
# @return [void]
|
|
34
|
+
def reset!
|
|
35
|
+
@renderers = {}
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|