coradoc-docx 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.adoc +164 -0
- data/lib/coradoc/docx/transform/context.rb +72 -0
- data/lib/coradoc/docx/transform/from_core_model.rb +577 -0
- data/lib/coradoc/docx/transform/numbering_resolver.rb +127 -0
- data/lib/coradoc/docx/transform/ordered_content.rb +95 -0
- data/lib/coradoc/docx/transform/rule.rb +57 -0
- data/lib/coradoc/docx/transform/rule_registry.rb +60 -0
- data/lib/coradoc/docx/transform/rules/bookmark_rule.rb +34 -0
- data/lib/coradoc/docx/transform/rules/break_rule.rb +30 -0
- data/lib/coradoc/docx/transform/rules/footnote_rule.rb +27 -0
- data/lib/coradoc/docx/transform/rules/heading_rule.rb +53 -0
- data/lib/coradoc/docx/transform/rules/hyperlink_rule.rb +58 -0
- data/lib/coradoc/docx/transform/rules/image_rule.rb +125 -0
- data/lib/coradoc/docx/transform/rules/list_item_rule.rb +47 -0
- data/lib/coradoc/docx/transform/rules/math_rule.rb +82 -0
- data/lib/coradoc/docx/transform/rules/paragraph_rule.rb +65 -0
- data/lib/coradoc/docx/transform/rules/proof_error_rule.rb +25 -0
- data/lib/coradoc/docx/transform/rules/run_rule.rb +189 -0
- data/lib/coradoc/docx/transform/rules/simple_field_rule.rb +87 -0
- data/lib/coradoc/docx/transform/rules/structured_document_tag_rule.rb +36 -0
- data/lib/coradoc/docx/transform/rules/table_rule.rb +85 -0
- data/lib/coradoc/docx/transform/rules/text_rule.rb +25 -0
- data/lib/coradoc/docx/transform/style_resolver.rb +249 -0
- data/lib/coradoc/docx/transform/to_core_model.rb +340 -0
- data/lib/coradoc/docx/transform.rb +38 -0
- data/lib/coradoc/docx/version.rb +7 -0
- data/lib/coradoc/docx.rb +99 -0
- metadata +155 -0
data/lib/coradoc/docx.rb
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'coradoc'
|
|
4
|
+
require 'coradoc/core_model'
|
|
5
|
+
require 'uniword'
|
|
6
|
+
|
|
7
|
+
# Coradoc::Docx provides DOCX (OOXML) format support for Coradoc.
|
|
8
|
+
#
|
|
9
|
+
# Transforms Uniword::Wordprocessingml model trees into
|
|
10
|
+
# Coradoc::CoreModel, enabling DOCX → AsciiDoc and DOCX → Markdown
|
|
11
|
+
# conversion via the hub-and-spoke architecture.
|
|
12
|
+
#
|
|
13
|
+
# @example Convert DOCX to AsciiDoc
|
|
14
|
+
# Coradoc.convert("input.docx", from: :docx, to: :asciidoc)
|
|
15
|
+
#
|
|
16
|
+
# @example Parse DOCX to CoreModel
|
|
17
|
+
# core = Coradoc::Docx.parse_to_core("input.docx")
|
|
18
|
+
#
|
|
19
|
+
module Coradoc
|
|
20
|
+
module Docx
|
|
21
|
+
autoload :VERSION, 'coradoc/docx/version'
|
|
22
|
+
autoload :Transform, 'coradoc/docx/transform'
|
|
23
|
+
|
|
24
|
+
class << self
|
|
25
|
+
# Parse a DOCX input to CoreModel
|
|
26
|
+
#
|
|
27
|
+
# @param input [String, IO, Uniword::Wordprocessingml::DocumentRoot]
|
|
28
|
+
# Path to .docx file, IO stream, or pre-parsed Uniword document
|
|
29
|
+
# @param _options [Hash] additional options (reserved)
|
|
30
|
+
# @return [Coradoc::CoreModel::StructuralElement] CoreModel document
|
|
31
|
+
def parse_to_core(input, _options = {})
|
|
32
|
+
document = coerce_to_document(input)
|
|
33
|
+
Transform::ToCoreModel.transform(document)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Parse a DOCX input to Uniword model (no CoreModel conversion)
|
|
37
|
+
#
|
|
38
|
+
# @param input [String, IO] path to .docx file or IO stream
|
|
39
|
+
# @return [Uniword::Wordprocessingml::DocumentRoot] Uniword document model
|
|
40
|
+
def parse(input, _options = {})
|
|
41
|
+
coerce_to_document(input)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Whether this format supports serialization
|
|
45
|
+
def serialize?
|
|
46
|
+
true
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Serialize CoreModel to DOCX
|
|
50
|
+
#
|
|
51
|
+
# @param core_model [Coradoc::CoreModel::Base] CoreModel document
|
|
52
|
+
# @param options [Hash] serialization options
|
|
53
|
+
# @option options [String] :output_path Path to write .docx file
|
|
54
|
+
# @return [String, Uniword::Wordprocessingml::DocumentRoot]
|
|
55
|
+
# Returns the output path if :output_path given, otherwise the DocumentRoot
|
|
56
|
+
def serialize(core_model, **options)
|
|
57
|
+
document = Transform::FromCoreModel.transform(core_model)
|
|
58
|
+
|
|
59
|
+
if options[:output_path]
|
|
60
|
+
document.save(options[:output_path])
|
|
61
|
+
options[:output_path]
|
|
62
|
+
elsif options[:to_io]
|
|
63
|
+
io = options[:to_io]
|
|
64
|
+
document.save(io.path)
|
|
65
|
+
io
|
|
66
|
+
else
|
|
67
|
+
document
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def coerce_to_document(input)
|
|
74
|
+
case input
|
|
75
|
+
when Uniword::Wordprocessingml::DocumentRoot
|
|
76
|
+
input
|
|
77
|
+
when String
|
|
78
|
+
raise ArgumentError, "File not found: #{input}" unless File.exist?(input)
|
|
79
|
+
|
|
80
|
+
Uniword::DocumentFactory.from_file(input)
|
|
81
|
+
|
|
82
|
+
when IO, StringIO
|
|
83
|
+
Uniword::DocumentFactory.from_io(input)
|
|
84
|
+
else
|
|
85
|
+
raise ArgumentError,
|
|
86
|
+
"Expected file path, IO, or DocumentRoot, got #{input.class}"
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Auto-register :docx format with Coradoc when both gems are loaded
|
|
94
|
+
unless Coradoc.registered_formats.include?(:docx)
|
|
95
|
+
Coradoc.register_format(:docx, Coradoc::Docx,
|
|
96
|
+
aliases: %w[docx],
|
|
97
|
+
extensions: %w[.docx],
|
|
98
|
+
binary: true)
|
|
99
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: coradoc-docx
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Ribose Inc.
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: coradoc
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '2.0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '2.0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: lutaml-model
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 0.8.0
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: 0.8.0
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: uniword
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '0'
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '0'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: rake
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '0'
|
|
61
|
+
type: :development
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '0'
|
|
68
|
+
- !ruby/object:Gem::Dependency
|
|
69
|
+
name: rspec
|
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - ">="
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: '0'
|
|
75
|
+
type: :development
|
|
76
|
+
prerelease: false
|
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - ">="
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: '0'
|
|
82
|
+
- !ruby/object:Gem::Dependency
|
|
83
|
+
name: rubocop
|
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - ">="
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '0'
|
|
89
|
+
type: :development
|
|
90
|
+
prerelease: false
|
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
92
|
+
requirements:
|
|
93
|
+
- - ">="
|
|
94
|
+
- !ruby/object:Gem::Version
|
|
95
|
+
version: '0'
|
|
96
|
+
description: Provides OOXML (DOCX) to CoreModel transformation for the Coradoc document
|
|
97
|
+
transformation hub. Uses Uniword to read DOCX files and transforms the OOXML model
|
|
98
|
+
tree to Coradoc::CoreModel.
|
|
99
|
+
email:
|
|
100
|
+
- open.source@ribose.com
|
|
101
|
+
executables: []
|
|
102
|
+
extensions: []
|
|
103
|
+
extra_rdoc_files: []
|
|
104
|
+
files:
|
|
105
|
+
- README.adoc
|
|
106
|
+
- lib/coradoc/docx.rb
|
|
107
|
+
- lib/coradoc/docx/transform.rb
|
|
108
|
+
- lib/coradoc/docx/transform/context.rb
|
|
109
|
+
- lib/coradoc/docx/transform/from_core_model.rb
|
|
110
|
+
- lib/coradoc/docx/transform/numbering_resolver.rb
|
|
111
|
+
- lib/coradoc/docx/transform/ordered_content.rb
|
|
112
|
+
- lib/coradoc/docx/transform/rule.rb
|
|
113
|
+
- lib/coradoc/docx/transform/rule_registry.rb
|
|
114
|
+
- lib/coradoc/docx/transform/rules/bookmark_rule.rb
|
|
115
|
+
- lib/coradoc/docx/transform/rules/break_rule.rb
|
|
116
|
+
- lib/coradoc/docx/transform/rules/footnote_rule.rb
|
|
117
|
+
- lib/coradoc/docx/transform/rules/heading_rule.rb
|
|
118
|
+
- lib/coradoc/docx/transform/rules/hyperlink_rule.rb
|
|
119
|
+
- lib/coradoc/docx/transform/rules/image_rule.rb
|
|
120
|
+
- lib/coradoc/docx/transform/rules/list_item_rule.rb
|
|
121
|
+
- lib/coradoc/docx/transform/rules/math_rule.rb
|
|
122
|
+
- lib/coradoc/docx/transform/rules/paragraph_rule.rb
|
|
123
|
+
- lib/coradoc/docx/transform/rules/proof_error_rule.rb
|
|
124
|
+
- lib/coradoc/docx/transform/rules/run_rule.rb
|
|
125
|
+
- lib/coradoc/docx/transform/rules/simple_field_rule.rb
|
|
126
|
+
- lib/coradoc/docx/transform/rules/structured_document_tag_rule.rb
|
|
127
|
+
- lib/coradoc/docx/transform/rules/table_rule.rb
|
|
128
|
+
- lib/coradoc/docx/transform/rules/text_rule.rb
|
|
129
|
+
- lib/coradoc/docx/transform/style_resolver.rb
|
|
130
|
+
- lib/coradoc/docx/transform/to_core_model.rb
|
|
131
|
+
- lib/coradoc/docx/version.rb
|
|
132
|
+
homepage: https://github.com/lutaml/coradoc
|
|
133
|
+
licenses:
|
|
134
|
+
- MIT
|
|
135
|
+
metadata:
|
|
136
|
+
homepage_uri: https://github.com/lutaml/coradoc
|
|
137
|
+
source_code_uri: https://github.com/lutaml/coradoc
|
|
138
|
+
rdoc_options: []
|
|
139
|
+
require_paths:
|
|
140
|
+
- lib
|
|
141
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
142
|
+
requirements:
|
|
143
|
+
- - ">="
|
|
144
|
+
- !ruby/object:Gem::Version
|
|
145
|
+
version: 3.1.0
|
|
146
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
147
|
+
requirements:
|
|
148
|
+
- - ">="
|
|
149
|
+
- !ruby/object:Gem::Version
|
|
150
|
+
version: '0'
|
|
151
|
+
requirements: []
|
|
152
|
+
rubygems_version: 3.6.9
|
|
153
|
+
specification_version: 4
|
|
154
|
+
summary: DOCX (OOXML) format support for Coradoc
|
|
155
|
+
test_files: []
|