coradoc-docx 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. checksums.yaml +7 -0
  2. data/README.adoc +164 -0
  3. data/lib/coradoc/docx/transform/context.rb +72 -0
  4. data/lib/coradoc/docx/transform/from_core_model.rb +577 -0
  5. data/lib/coradoc/docx/transform/numbering_resolver.rb +127 -0
  6. data/lib/coradoc/docx/transform/ordered_content.rb +95 -0
  7. data/lib/coradoc/docx/transform/rule.rb +57 -0
  8. data/lib/coradoc/docx/transform/rule_registry.rb +60 -0
  9. data/lib/coradoc/docx/transform/rules/bookmark_rule.rb +34 -0
  10. data/lib/coradoc/docx/transform/rules/break_rule.rb +30 -0
  11. data/lib/coradoc/docx/transform/rules/footnote_rule.rb +27 -0
  12. data/lib/coradoc/docx/transform/rules/heading_rule.rb +53 -0
  13. data/lib/coradoc/docx/transform/rules/hyperlink_rule.rb +58 -0
  14. data/lib/coradoc/docx/transform/rules/image_rule.rb +125 -0
  15. data/lib/coradoc/docx/transform/rules/list_item_rule.rb +47 -0
  16. data/lib/coradoc/docx/transform/rules/math_rule.rb +82 -0
  17. data/lib/coradoc/docx/transform/rules/paragraph_rule.rb +65 -0
  18. data/lib/coradoc/docx/transform/rules/proof_error_rule.rb +25 -0
  19. data/lib/coradoc/docx/transform/rules/run_rule.rb +189 -0
  20. data/lib/coradoc/docx/transform/rules/simple_field_rule.rb +87 -0
  21. data/lib/coradoc/docx/transform/rules/structured_document_tag_rule.rb +36 -0
  22. data/lib/coradoc/docx/transform/rules/table_rule.rb +85 -0
  23. data/lib/coradoc/docx/transform/rules/text_rule.rb +25 -0
  24. data/lib/coradoc/docx/transform/style_resolver.rb +249 -0
  25. data/lib/coradoc/docx/transform/to_core_model.rb +340 -0
  26. data/lib/coradoc/docx/transform.rb +38 -0
  27. data/lib/coradoc/docx/version.rb +7 -0
  28. data/lib/coradoc/docx.rb +99 -0
  29. metadata +155 -0
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'coradoc'
4
+ require 'coradoc/core_model'
5
+ require 'uniword'
6
+
7
+ # Coradoc::Docx provides DOCX (OOXML) format support for Coradoc.
8
+ #
9
+ # Transforms Uniword::Wordprocessingml model trees into
10
+ # Coradoc::CoreModel, enabling DOCX → AsciiDoc and DOCX → Markdown
11
+ # conversion via the hub-and-spoke architecture.
12
+ #
13
+ # @example Convert DOCX to AsciiDoc
14
+ # Coradoc.convert("input.docx", from: :docx, to: :asciidoc)
15
+ #
16
+ # @example Parse DOCX to CoreModel
17
+ # core = Coradoc::Docx.parse_to_core("input.docx")
18
+ #
19
+ module Coradoc
20
+ module Docx
21
+ autoload :VERSION, 'coradoc/docx/version'
22
+ autoload :Transform, 'coradoc/docx/transform'
23
+
24
+ class << self
25
+ # Parse a DOCX input to CoreModel
26
+ #
27
+ # @param input [String, IO, Uniword::Wordprocessingml::DocumentRoot]
28
+ # Path to .docx file, IO stream, or pre-parsed Uniword document
29
+ # @param _options [Hash] additional options (reserved)
30
+ # @return [Coradoc::CoreModel::StructuralElement] CoreModel document
31
+ def parse_to_core(input, _options = {})
32
+ document = coerce_to_document(input)
33
+ Transform::ToCoreModel.transform(document)
34
+ end
35
+
36
+ # Parse a DOCX input to Uniword model (no CoreModel conversion)
37
+ #
38
+ # @param input [String, IO] path to .docx file or IO stream
39
+ # @return [Uniword::Wordprocessingml::DocumentRoot] Uniword document model
40
+ def parse(input, _options = {})
41
+ coerce_to_document(input)
42
+ end
43
+
44
+ # Whether this format supports serialization
45
+ def serialize?
46
+ true
47
+ end
48
+
49
+ # Serialize CoreModel to DOCX
50
+ #
51
+ # @param core_model [Coradoc::CoreModel::Base] CoreModel document
52
+ # @param options [Hash] serialization options
53
+ # @option options [String] :output_path Path to write .docx file
54
+ # @return [String, Uniword::Wordprocessingml::DocumentRoot]
55
+ # Returns the output path if :output_path given, otherwise the DocumentRoot
56
+ def serialize(core_model, **options)
57
+ document = Transform::FromCoreModel.transform(core_model)
58
+
59
+ if options[:output_path]
60
+ document.save(options[:output_path])
61
+ options[:output_path]
62
+ elsif options[:to_io]
63
+ io = options[:to_io]
64
+ document.save(io.path)
65
+ io
66
+ else
67
+ document
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ def coerce_to_document(input)
74
+ case input
75
+ when Uniword::Wordprocessingml::DocumentRoot
76
+ input
77
+ when String
78
+ raise ArgumentError, "File not found: #{input}" unless File.exist?(input)
79
+
80
+ Uniword::DocumentFactory.from_file(input)
81
+
82
+ when IO, StringIO
83
+ Uniword::DocumentFactory.from_io(input)
84
+ else
85
+ raise ArgumentError,
86
+ "Expected file path, IO, or DocumentRoot, got #{input.class}"
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+ # Auto-register :docx format with Coradoc when both gems are loaded
94
+ unless Coradoc.registered_formats.include?(:docx)
95
+ Coradoc.register_format(:docx, Coradoc::Docx,
96
+ aliases: %w[docx],
97
+ extensions: %w[.docx],
98
+ binary: true)
99
+ end
metadata ADDED
@@ -0,0 +1,155 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: coradoc-docx
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ribose Inc.
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: coradoc
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '2.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '2.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: lutaml-model
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.0
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.8.0
40
+ - !ruby/object:Gem::Dependency
41
+ name: uniword
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: rake
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: rspec
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ - !ruby/object:Gem::Dependency
83
+ name: rubocop
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ description: Provides OOXML (DOCX) to CoreModel transformation for the Coradoc document
97
+ transformation hub. Uses Uniword to read DOCX files and transforms the OOXML model
98
+ tree to Coradoc::CoreModel.
99
+ email:
100
+ - open.source@ribose.com
101
+ executables: []
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - README.adoc
106
+ - lib/coradoc/docx.rb
107
+ - lib/coradoc/docx/transform.rb
108
+ - lib/coradoc/docx/transform/context.rb
109
+ - lib/coradoc/docx/transform/from_core_model.rb
110
+ - lib/coradoc/docx/transform/numbering_resolver.rb
111
+ - lib/coradoc/docx/transform/ordered_content.rb
112
+ - lib/coradoc/docx/transform/rule.rb
113
+ - lib/coradoc/docx/transform/rule_registry.rb
114
+ - lib/coradoc/docx/transform/rules/bookmark_rule.rb
115
+ - lib/coradoc/docx/transform/rules/break_rule.rb
116
+ - lib/coradoc/docx/transform/rules/footnote_rule.rb
117
+ - lib/coradoc/docx/transform/rules/heading_rule.rb
118
+ - lib/coradoc/docx/transform/rules/hyperlink_rule.rb
119
+ - lib/coradoc/docx/transform/rules/image_rule.rb
120
+ - lib/coradoc/docx/transform/rules/list_item_rule.rb
121
+ - lib/coradoc/docx/transform/rules/math_rule.rb
122
+ - lib/coradoc/docx/transform/rules/paragraph_rule.rb
123
+ - lib/coradoc/docx/transform/rules/proof_error_rule.rb
124
+ - lib/coradoc/docx/transform/rules/run_rule.rb
125
+ - lib/coradoc/docx/transform/rules/simple_field_rule.rb
126
+ - lib/coradoc/docx/transform/rules/structured_document_tag_rule.rb
127
+ - lib/coradoc/docx/transform/rules/table_rule.rb
128
+ - lib/coradoc/docx/transform/rules/text_rule.rb
129
+ - lib/coradoc/docx/transform/style_resolver.rb
130
+ - lib/coradoc/docx/transform/to_core_model.rb
131
+ - lib/coradoc/docx/version.rb
132
+ homepage: https://github.com/lutaml/coradoc
133
+ licenses:
134
+ - MIT
135
+ metadata:
136
+ homepage_uri: https://github.com/lutaml/coradoc
137
+ source_code_uri: https://github.com/lutaml/coradoc
138
+ rdoc_options: []
139
+ require_paths:
140
+ - lib
141
+ required_ruby_version: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: 3.1.0
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ requirements: []
152
+ rubygems_version: 3.6.9
153
+ specification_version: 4
154
+ summary: DOCX (OOXML) format support for Coradoc
155
+ test_files: []