exwiw 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +38 -0
- data/lib/exwiw/adapter/mongodb_adapter.rb +73 -5
- data/lib/exwiw/adapter.rb +12 -0
- data/lib/exwiw/embedded_in.rb +14 -0
- data/lib/exwiw/mongodb_collection_config.rb +26 -0
- data/lib/exwiw/runner.rb +7 -4
- data/lib/exwiw/version.rb +1 -1
- data/lib/exwiw.rb +1 -0
- metadata +2 -2
- data/.rubocop.yml +0 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4d94b3d27454accfa118d2ee8196f8df53ad026de2cce65c23d783b80ff9320d
|
|
4
|
+
data.tar.gz: 4292c5dca37b34d9a40892440603df62e77e8de62b68375e2990102da83c08f6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0cae5f397aff3258f7115625e2828d17579b754982287117384257d9858c5867d063b200954d978e54a95e86c5edf919203084f0913fdfdc2a156fde3f71d1cc
|
|
7
|
+
data.tar.gz: 41705e1dbcb3a9664e4fdeeaacd1da6b49f35131b1d03191adfdcc8c101bd348bc22d96fc4ebc6daf68fa67b4b70ce68a8f29eecdd986021b0ef8e9685191331
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [0.1.7] - 2026-05-14
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Add embedded document support to the MongoDB adapter via `embedded_in: { collection_name, path }`. Embedded configs are not dumped as their own jsonl; their `replace_with` rules apply to subdocuments (Array or Hash, with multi-level nesting) inside the parent collection.
|
|
10
|
+
|
|
11
|
+
## [0.1.6] - 2026-03-14
|
|
12
|
+
|
|
5
13
|
### Added
|
|
6
14
|
|
|
7
15
|
- Add `bulk_insert_chunk_size` table config to split the generated `INSERT` statement into chunks of the specified size. ([#8](https://github.com/riseshia/exwiw/pull/8))
|
data/README.md
CHANGED
|
@@ -186,6 +186,44 @@ The MongoDB adapter is experimental. To use it:
|
|
|
186
186
|
- `raw_sql` is not supported (the `MongodbField` schema does not declare it; any `raw_sql` keys in scenario JSON are silently dropped on load). Use `replace_with` for masking.
|
|
187
187
|
- The MongoDB adapter does not support the collection-level `filter` field (it raises `NotImplementedError` if set, since the SQL-string filter cannot be applied to MongoDB).
|
|
188
188
|
|
|
189
|
+
#### Embedded documents
|
|
190
|
+
|
|
191
|
+
MongoDB models often store one-to-many relationships as embedded subdocument arrays (e.g. `users` documents with a `posts: [...]` field). To mask fields inside embedded subdocuments, declare a separate config with `embedded_in`:
|
|
192
|
+
|
|
193
|
+
```jsonc
|
|
194
|
+
// scenario/users.json — top-level collection
|
|
195
|
+
{
|
|
196
|
+
"name": "users",
|
|
197
|
+
"primary_key": "_id",
|
|
198
|
+
"belongs_tos": [{ "table_name": "shops", "foreign_key": "shop_id" }],
|
|
199
|
+
"fields": [
|
|
200
|
+
{ "name": "_id" },
|
|
201
|
+
{ "name": "name", "replace_with": "masked{_id}" },
|
|
202
|
+
{ "name": "shop_id" }
|
|
203
|
+
]
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// scenario/posts.json — embedded under users.posts
|
|
207
|
+
{
|
|
208
|
+
"name": "posts",
|
|
209
|
+
"primary_key": "_id",
|
|
210
|
+
"embedded_in": { "collection_name": "users", "path": "posts" },
|
|
211
|
+
"belongs_tos": [],
|
|
212
|
+
"fields": [
|
|
213
|
+
{ "name": "_id" },
|
|
214
|
+
{ "name": "title", "replace_with": "masked-{_id}" }
|
|
215
|
+
]
|
|
216
|
+
}
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
At runtime:
|
|
220
|
+
|
|
221
|
+
- `posts` is **not** dumped as its own jsonl file. Its `replace_with` rules are applied to the subdocuments inside the parent `users` document at the path `posts`.
|
|
222
|
+
- `path` accepts dot-separated paths for nested fields (e.g. `"profile.contacts"`).
|
|
223
|
+
- Both arrays of subdocuments and a single Hash subdocument at `path` are supported. Multiple levels of nesting work via embedded chains.
|
|
224
|
+
- Cross-collection references from inside an embedded subdocument (`belongs_tos` on an embedded config) are not supported and raise `ArgumentError` on load.
|
|
225
|
+
- Specifying an embedded config as `--target-table` raises `NotImplementedError`; pass the top-level collection name instead.
|
|
226
|
+
|
|
189
227
|
## How it works
|
|
190
228
|
|
|
191
229
|
- Load the table information from the specified config file.
|
|
@@ -3,10 +3,9 @@
|
|
|
3
3
|
require 'json'
|
|
4
4
|
|
|
5
5
|
# NOTE: This adapter consumes MongodbCollectionConfig (`fields` instead of
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
# follow references that live inside embedded structures.
|
|
6
|
+
# `columns`, plus `embedded_in`). Top-level collections are dumped as one
|
|
7
|
+
# jsonl per collection; configs marked `embedded_in` are not dumped on their
|
|
8
|
+
# own — their masking rules apply to subdocuments inside the parent.
|
|
10
9
|
module Exwiw
|
|
11
10
|
module Adapter
|
|
12
11
|
class MongodbAdapter < Base
|
|
@@ -19,8 +18,32 @@ module Exwiw
|
|
|
19
18
|
@state = {}
|
|
20
19
|
end
|
|
21
20
|
|
|
22
|
-
def
|
|
21
|
+
def dumpable?(config)
|
|
22
|
+
!config.embedded?
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def validate_as_dump_target!(config)
|
|
26
|
+
return unless config.embedded?
|
|
27
|
+
|
|
28
|
+
raise NotImplementedError,
|
|
29
|
+
"dump_target '#{config.name}' is an embedded MongodbCollectionConfig; " \
|
|
30
|
+
"specify a top-level collection instead."
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def build_query(config, dump_target, config_by_name)
|
|
34
|
+
if config.embedded?
|
|
35
|
+
raise NotImplementedError,
|
|
36
|
+
"MongodbAdapter#build_query was called with embedded config '#{config.name}'. " \
|
|
37
|
+
"Embedded configs are masked through the parent collection."
|
|
38
|
+
end
|
|
39
|
+
|
|
23
40
|
reject_filter!(config)
|
|
41
|
+
# Stash the embedded-children index for the matching to_bulk_insert call
|
|
42
|
+
# below. The Adapter contract does not pass config_by_name to
|
|
43
|
+
# to_bulk_insert (SQL adapters don't need it), so we rely on the Runner
|
|
44
|
+
# invariant that build_query is always called before to_bulk_insert for
|
|
45
|
+
# the same config.
|
|
46
|
+
@embedded_children_by_parent = index_embedded_children(config_by_name)
|
|
24
47
|
|
|
25
48
|
filter =
|
|
26
49
|
if config.name == dump_target.table_name
|
|
@@ -57,9 +80,15 @@ module Exwiw
|
|
|
57
80
|
docs
|
|
58
81
|
end
|
|
59
82
|
|
|
83
|
+
# NOTE: relies on @embedded_children_by_parent set by a prior build_query
|
|
84
|
+
# call for the same config. This implicit ordering exists because the
|
|
85
|
+
# Adapter contract intentionally does not thread config_by_name through
|
|
86
|
+
# to_bulk_insert (SQL adapters don't need it). Safe in Runner, fragile in
|
|
87
|
+
# tests — call build_query first.
|
|
60
88
|
def to_bulk_insert(rows, config)
|
|
61
89
|
rows.map do |doc|
|
|
62
90
|
apply_replace_with!(doc, config)
|
|
91
|
+
apply_embedded_masking!(doc, config)
|
|
63
92
|
JSON.generate(extended_json(doc))
|
|
64
93
|
end.join("\n")
|
|
65
94
|
end
|
|
@@ -96,6 +125,14 @@ module Exwiw
|
|
|
96
125
|
"collection-level `filter` is not supported by MongodbAdapter (collection: #{config.name})"
|
|
97
126
|
end
|
|
98
127
|
|
|
128
|
+
private def index_embedded_children(config_by_name)
|
|
129
|
+
config_by_name.each_value.with_object({}) do |child, acc|
|
|
130
|
+
next unless child.embedded?
|
|
131
|
+
|
|
132
|
+
(acc[child.embedded_in.collection_name] ||= []) << child
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
99
136
|
private def build_projection(config)
|
|
100
137
|
projection = {}
|
|
101
138
|
# Always include primary key so masking templates referencing it work,
|
|
@@ -104,6 +141,11 @@ module Exwiw
|
|
|
104
141
|
config.fields.each do |field|
|
|
105
142
|
projection[field.name] = 1
|
|
106
143
|
end
|
|
144
|
+
# Pull in paths owned by configs that mark themselves embedded in this
|
|
145
|
+
# collection, so the masker sees the subdocuments.
|
|
146
|
+
embedded_children_of(config).each do |child|
|
|
147
|
+
projection[child.embedded_in.path] = 1
|
|
148
|
+
end
|
|
107
149
|
projection
|
|
108
150
|
end
|
|
109
151
|
|
|
@@ -118,6 +160,32 @@ module Exwiw
|
|
|
118
160
|
end
|
|
119
161
|
end
|
|
120
162
|
|
|
163
|
+
private def apply_embedded_masking!(doc, parent_config)
|
|
164
|
+
embedded_children_of(parent_config).each do |child|
|
|
165
|
+
walk(doc, child.embedded_in.path) do |subdoc|
|
|
166
|
+
apply_replace_with!(subdoc, child)
|
|
167
|
+
apply_embedded_masking!(subdoc, child)
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
private def embedded_children_of(parent_config)
|
|
173
|
+
@embedded_children_by_parent.fetch(parent_config.name, [])
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
private def walk(doc, dotted_path)
|
|
177
|
+
segments = dotted_path.split(".")
|
|
178
|
+
*prefix, last = segments
|
|
179
|
+
container = prefix.reduce(doc) { |acc, seg| acc.is_a?(Hash) ? acc[seg] : nil }
|
|
180
|
+
return unless container.is_a?(Hash)
|
|
181
|
+
|
|
182
|
+
value = container[last]
|
|
183
|
+
case value
|
|
184
|
+
when Array then value.each { |sub| yield sub if sub.is_a?(Hash) }
|
|
185
|
+
when Hash then yield value
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
121
189
|
private def extended_json(doc)
|
|
122
190
|
if doc.respond_to?(:as_extended_json)
|
|
123
191
|
doc.as_extended_json(mode: :relaxed)
|
data/lib/exwiw/adapter.rb
CHANGED
|
@@ -34,6 +34,18 @@ module Exwiw
|
|
|
34
34
|
def supports_bulk_delete?
|
|
35
35
|
true
|
|
36
36
|
end
|
|
37
|
+
|
|
38
|
+
# Whether the given config produces its own dump output and needs an
|
|
39
|
+
# independent processing pass. SQL adapters always do; non-SQL adapters
|
|
40
|
+
# may exclude e.g. embedded subdocument configs.
|
|
41
|
+
def dumpable?(_config)
|
|
42
|
+
true
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Hook for adapter-specific validation when this config is used as the
|
|
46
|
+
# dump_target. Default: nothing to validate.
|
|
47
|
+
def validate_as_dump_target!(_config)
|
|
48
|
+
end
|
|
37
49
|
end
|
|
38
50
|
|
|
39
51
|
# @params [Exwiw::QueryAst] query_ast
|
|
@@ -14,8 +14,34 @@ module Exwiw
|
|
|
14
14
|
attribute :fields, array(MongodbField)
|
|
15
15
|
attribute :bulk_insert_chunk_size, optional(Integer), skip_serializing_if_nil: true
|
|
16
16
|
|
|
17
|
+
# Marks this config as physically embedded inside another collection's
|
|
18
|
+
# documents. When set, this config is not processed as a standalone dump
|
|
19
|
+
# unit; its masking rules are applied to the parent's subdocuments at
|
|
20
|
+
# `path`.
|
|
21
|
+
attribute :embedded_in, optional(EmbeddedIn), skip_serializing_if_nil: true
|
|
22
|
+
|
|
23
|
+
def self.from(obj)
|
|
24
|
+
instance = super
|
|
25
|
+
instance.__send__(:validate_embedded!)
|
|
26
|
+
instance
|
|
27
|
+
end
|
|
28
|
+
|
|
17
29
|
def self.from_symbol_keys(hash)
|
|
18
30
|
from(JSON.parse(hash.to_json))
|
|
19
31
|
end
|
|
32
|
+
|
|
33
|
+
def embedded?
|
|
34
|
+
!embedded_in.nil?
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private def validate_embedded!
|
|
38
|
+
return unless embedded?
|
|
39
|
+
return if belongs_tos.empty?
|
|
40
|
+
|
|
41
|
+
raise ArgumentError,
|
|
42
|
+
"MongodbCollectionConfig '#{name}' is embedded_in '#{embedded_in.collection_name}'; " \
|
|
43
|
+
"belongs_tos must be empty (cross-collection refs from inside embedded arrays " \
|
|
44
|
+
"are not supported)."
|
|
45
|
+
end
|
|
20
46
|
end
|
|
21
47
|
end
|
data/lib/exwiw/runner.rb
CHANGED
|
@@ -20,17 +20,20 @@ module Exwiw
|
|
|
20
20
|
|
|
21
21
|
def run
|
|
22
22
|
adapter = Adapter.build(@connection_config, @logger)
|
|
23
|
-
|
|
23
|
+
configs = load_table_config(adapter.class.table_config_class)
|
|
24
|
+
|
|
25
|
+
table_by_name = configs.each_with_object({}) { |config, hash| hash[config.name] = config }
|
|
26
|
+
|
|
27
|
+
target = table_by_name[@dump_target.table_name]
|
|
28
|
+
adapter.validate_as_dump_target!(target) if target
|
|
24
29
|
|
|
25
30
|
@logger.info("Determining table processing order...")
|
|
26
|
-
ordered_table_names = DetermineTableProcessingOrder.run(
|
|
31
|
+
ordered_table_names = DetermineTableProcessingOrder.run(configs.select { |c| adapter.dumpable?(c) })
|
|
27
32
|
|
|
28
33
|
if !Dir.exist?(@output_dir)
|
|
29
34
|
FileUtils.mkdir_p(@output_dir)
|
|
30
35
|
end
|
|
31
36
|
|
|
32
|
-
table_by_name = tables.each_with_object({}) { |table, hash| hash[table.name] = table }
|
|
33
|
-
|
|
34
37
|
total_size = ordered_table_names.size
|
|
35
38
|
ordered_table_names.each_with_index do |table_name, idx|
|
|
36
39
|
@logger.info("Processing table '#{table_name}'... (#{idx + 1}/#{total_size})")
|
data/lib/exwiw/version.rb
CHANGED
data/lib/exwiw.rb
CHANGED
|
@@ -8,6 +8,7 @@ require "serdes"
|
|
|
8
8
|
require_relative "exwiw/belongs_to"
|
|
9
9
|
require_relative "exwiw/table_column"
|
|
10
10
|
require_relative "exwiw/table_config"
|
|
11
|
+
require_relative "exwiw/embedded_in"
|
|
11
12
|
require_relative "exwiw/mongodb_field"
|
|
12
13
|
require_relative "exwiw/mongodb_collection_config"
|
|
13
14
|
require_relative "exwiw/adapter"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: exwiw
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Shia
|
|
@@ -32,7 +32,6 @@ executables:
|
|
|
32
32
|
extensions: []
|
|
33
33
|
extra_rdoc_files: []
|
|
34
34
|
files:
|
|
35
|
-
- ".rubocop.yml"
|
|
36
35
|
- CHANGELOG.md
|
|
37
36
|
- LICENSE.txt
|
|
38
37
|
- README.md
|
|
@@ -46,6 +45,7 @@ files:
|
|
|
46
45
|
- lib/exwiw/belongs_to.rb
|
|
47
46
|
- lib/exwiw/cli.rb
|
|
48
47
|
- lib/exwiw/determine_table_processing_order.rb
|
|
48
|
+
- lib/exwiw/embedded_in.rb
|
|
49
49
|
- lib/exwiw/mongo_query.rb
|
|
50
50
|
- lib/exwiw/mongodb_collection_config.rb
|
|
51
51
|
- lib/exwiw/mongodb_field.rb
|